// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearcher.h" #include #include #include #include LOG_SETUP(".vsm.searcher.fieldsearcher"); using search::byte; using search::streaming::Query; using search::streaming::QueryTerm; using search::streaming::QueryTermList; using search::v16qi; namespace vsm { class force { public: force() { FieldSearcher::init(); } }; static force __forceInit; byte FieldSearcher::_foldLowCase[256]; byte FieldSearcher::_wordChar[256]; FieldSearcherBase::FieldSearcherBase() : _qtl(), _qtlFastBuffer(), _qtlFastSize(0), _qtlFast(nullptr) { } FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) : _qtl(), _qtlFastBuffer(), _qtlFastSize(0), _qtlFast(nullptr) { prepare(org._qtl); } FieldSearcherBase::~FieldSearcherBase() { } FieldSearcherBase & FieldSearcherBase::operator = (const FieldSearcherBase & org) { if (this != &org) { prepare(org._qtl); } return *this; } void FieldSearcherBase::prepare(const QueryTermList & qtl) { _qtl = qtl; _qtlFastBuffer.resize(sizeof(*_qtlFast)*(_qtl.size()+1), 0x13); _qtlFast = reinterpret_cast(reinterpret_cast(&_qtlFastBuffer[0]+15) & ~0xf); _qtlFastSize = 0; for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { const QueryTerm & qt = **it; memcpy(&_qtlFast[_qtlFastSize++], qt.getTerm(), std::min(size_t(16), qt.termLen())); } } FieldSearcher::FieldSearcher(const FieldIdT & fId, bool defaultPrefix) : FieldSearcherBase(), _field(fId), _matchType(defaultPrefix ? PREFIX : REGULAR), _maxFieldLength(0x100000), _currentElementId(0), _currentElementWeight(1), _pureUsAsciiCount(0), _pureUsAsciiFieldCount(0), _anyUtf8Count(0), _anyUtf8FieldCount(0), _words(0), _badUtf8Count(0), _zeroCount(0) { zeroStat(); } FieldSearcher::~FieldSearcher() = default; bool FieldSearcher::search(const StorageDocument & doc) { for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { QueryTerm & qt = **it; QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field()); fInfo.setHitOffset(qt.getHitList().size()); } onSearch(doc); for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { QueryTerm & qt = **it; QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field()); fInfo.setHitCount(qt.getHitList().size() - fInfo.getHitOffset()); fInfo.setFieldLength(_words); } _words = 0; return true; } void FieldSearcher::prepare(QueryTermList& qtl, const SharedSearcherBuf&, const vsm::FieldPathMapT&, search::fef::IQueryEnvironment&) { FieldSearcherBase::prepare(qtl); prepareFieldId(); } size_t FieldSearcher::countWords(const FieldRef & f) { size_t words = 0; const char * n = f.data(); const char * e = n + f.size(); for( ; n < e; ++n) { for (; isspace(*n) && (n m) { words++; } } return words; } void FieldSearcher::prepareFieldId() { for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { QueryTerm & qt = **it; qt.resizeFieldId(field()); } } void FieldSearcher::addStat(const FieldSearcher & toAdd) { _pureUsAsciiCount += toAdd._pureUsAsciiCount; _pureUsAsciiFieldCount += toAdd._pureUsAsciiFieldCount; _anyUtf8Count += toAdd._anyUtf8Count; _anyUtf8FieldCount += toAdd._anyUtf8FieldCount; _badUtf8Count += toAdd._badUtf8Count; _zeroCount += toAdd._zeroCount; for (size_t i=0; ifield(); for (QueryTermList::iterator qt = qtl.begin(), mqt = qtl.end(); qt != mqt; qt++) { QueryTerm * q = *qt; for (DocumentTypeIndexFieldMapT::const_iterator dt(difm.begin()), dmt(difm.end()); dt != dmt; dt++) { const IndexFieldMapT & fim = dt->second; IndexFieldMapT::const_iterator found = fim.find(FieldSearchSpecMap::stripNonFields(q->index())); if (found != fim.end()) { const FieldIdTList & index = found->second; if ((find(index.begin(), index.end(), fid) != index.end()) && (find(onlyInIndex.begin(), onlyInIndex.end(), q) == onlyInIndex.end())) { onlyInIndex.push_back(q); } } else { LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", q->index().c_str()); } } } /// Should perhaps do a unique on onlyInIndex (*it)->prepare(onlyInIndex, searcherBuf, field_paths, query_env); if (LOG_WOULD_LOG(spam)) { char tmpBuf[16]; snprintf(tmpBuf, sizeof(tmpBuf), "%d", fid); tmp += tmpBuf; tmp += ", "; } } LOG(debug, "Will search in %s", tmp.c_str()); } bool FieldSearcher::onSearch(const StorageDocument & doc) { bool retval(true); size_t fNo(field()); const StorageDocument::SubDocument & sub = doc.getComplexField(fNo); if (sub.getFieldValue() != nullptr) { IteratorHandler ih(*this); sub.getFieldValue()->iterateNested(sub.getRange(), ih); } return retval; } void FieldSearcher::IteratorHandler::onPrimitive(uint32_t, const Content & c) { LOG(spam, "onPrimitive: field value '%s'", c.getValue().toString().c_str()); _searcher.setCurrentWeight(c.getWeight()); _searcher.setCurrentElementId(getArrayIndex()); _searcher.onValue(c.getValue()); } void FieldSearcher::IteratorHandler::onCollectionStart(const Content & c) { const document::FieldValue & fv = c.getValue(); LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str()); if (fv.isA(document::FieldValue::Type::ARRAY)) { const document::ArrayFieldValue & afv = static_cast(fv); LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size()); } else if (fv.isA(document::FieldValue::Type::WSET)) { const document::WeightedSetFieldValue & wsfv = static_cast(fv); LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size()); } } void FieldSearcher::IteratorHandler::onStructStart(const Content & c) { LOG(spam, "onStructStart: field value '%s'", c.getValue().toString().c_str()); _searcher.onStructValue(static_cast(c.getValue())); } }