diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-19 17:02:19 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-04 10:42:37 +0000 |
commit | 0f9915ebdfb2931ebb3c06ac55b537f42477256b (patch) | |
tree | a6f3a41e547c032d773ef391da22ca04bcfcacc3 /streamingvisitors/src | |
parent | 5f1ec31a0decec88322835705afa26cc7d35fd3e (diff) |
- Modernize code
- Unify some conversion tables.
Diffstat (limited to 'streamingvisitors/src')
7 files changed, 100 insertions, 155 deletions
diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp index 4492dfac02b..1ce285c2103 100644 --- a/streamingvisitors/src/tests/searcher/searcher_test.cpp +++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp @@ -47,7 +47,7 @@ class String private: const std::string & _str; public: - String(const std::string & str) : _str(str) {} + explicit String(const std::string & str) : _str(str) {} bool operator==(const String & rhs) const { return _str == rhs._str; } @@ -57,13 +57,13 @@ class Query { private: void setupQuery(const StringList & terms) { - for (size_t i = 0; i < terms.size(); ++i) { - ParsedQueryTerm pqt = parseQueryTerm(terms[i]); + for (const auto & term : terms) { + ParsedQueryTerm pqt = parseQueryTerm(term); ParsedTerm pt = parseTerm(pqt.second); qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second)); } - for (size_t i = 0; i < qtv.size(); ++i) { - qtl.push_back(qtv[i].get()); + for (const auto & i : qtv) { + qtl.push_back(i.get()); } } public: @@ -72,14 +72,14 @@ public: QueryNodeResultFactory eqnr; std::vector<QueryTerm::UP> qtv; QueryTermList qtl; - Query(const StringList & terms); + explicit Query(const StringList & terms); ~Query(); static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) { size_t i = queryTerm.find(':'); if (i != std::string::npos) { - return ParsedQueryTerm(queryTerm.substr(0, i), queryTerm.substr(i + 1)); + return {queryTerm.substr(0, i), queryTerm.substr(i + 1)}; } - return ParsedQueryTerm(std::string(), queryTerm); + return {std::string(), queryTerm}; } static ParsedTerm parseTerm(const std::string & term) { if (term[0] == '*' && term[term.size() - 1] == '*') { @@ -254,8 +254,8 @@ getFieldValue(const StringList & fv) static ArrayDataType type(*DataType::STRING); ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(StringFieldValue(fv[i])); + for (const auto & v : fv) { + afv.add(StringFieldValue(v)); } return afv; } @@ -265,8 +265,8 @@ getFieldValue(const LongList & fv) { static ArrayDataType type(*DataType::LONG); ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(LongFieldValue(fv[i])); + for (long v : fv) { + afv.add(LongFieldValue(v)); } return afv; } @@ -276,8 +276,8 @@ getFieldValue(const FloatList & fv) { static ArrayDataType type(*DataType::FLOAT); ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(FloatFieldValue(fv[i])); + for (float v : fv) { + afv.add(FloatFieldValue(v)); } return afv; } @@ -299,8 +299,8 @@ void assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp) { HitsList hl; - for (size_t i = 0; i < exp.size(); ++i) { - hl.push_back(exp[i] ? Hits().add(0) : Hits()); + for (bool v : exp) { + hl.push_back(v ? Hits().add(0) : Hits()); } assertSearch(fs, query, fv, hl); } @@ -316,7 +316,7 @@ performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & f // setup document SharedFieldPathMap sfim(new FieldPathMapT()); - sfim->push_back(FieldPath()); + sfim->emplace_back(); StorageDocument doc(std::make_unique<document::Document>(), sfim, 1); doc.setField(0, document::FieldValue::UP(fv.clone())); @@ -369,7 +369,7 @@ assertSnippetModifier(const StringList & query, const std::string & fv, const st void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp) { FieldValue::UP mfv = setup.modifier.modify(fv); - const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get()); + const auto & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get()); const std::string & actual = lfv.getValue(); EXPECT_EQUAL(actual.size(), exp.size()); EXPECT_EQUAL(actual, exp); @@ -377,11 +377,11 @@ void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, void assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms) { - if (terms.size() == 0) { - ASSERT_TRUE(man.getModifiers().getModifier(fId) == NULL); + if (terms.empty()) { + ASSERT_TRUE(man.getModifiers().getModifier(fId) == nullptr); return; } - ASSERT_TRUE(man.getModifiers().getModifier(fId) != NULL); + ASSERT_TRUE(man.getModifiers().getModifier(fId) != nullptr); UTF8SubstringSnippetModifier * searcher = (static_cast<SnippetModifier *>(man.getModifiers().getModifier(fId)))->getSearcher().get(); EXPECT_EQUAL(searcher->getQueryTerms().size(), terms.size()); @@ -466,7 +466,7 @@ testStrChrFieldSearcher(StrChrFieldSearcher & fs) TEST("verify correct term parsing") { ASSERT_TRUE(Query::parseQueryTerm("index:term").first == "index"); ASSERT_TRUE(Query::parseQueryTerm("index:term").second == "term"); - ASSERT_TRUE(Query::parseQueryTerm("term").first == ""); + ASSERT_TRUE(Query::parseQueryTerm("term").first.empty()); ASSERT_TRUE(Query::parseQueryTerm("term").second == "term"); ASSERT_TRUE(Query::parseTerm("*substr*").first == "substr"); ASSERT_TRUE(Query::parseTerm("*substr*").second == TermType::SUBSTRINGTERM); @@ -822,13 +822,13 @@ TEST("snippet modifier manager") { Query query(StringList().add("i2:foo").add("i2:*bar*")); man.setup(query.qtl, specMap, indexMap, *env.field_paths, env.query_env); { - SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0)); + auto * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0)); UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get(); EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u); EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u); } { - SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1)); + auto * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1)); UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get(); EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u); EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u); diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp index c797e6751ee..851606634cc 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp @@ -28,26 +28,24 @@ static force __forceInit; byte FieldSearcher::_foldLowCase[256]; byte FieldSearcher::_wordChar[256]; -FieldSearcherBase::FieldSearcherBase() : - _qtl(), - _qtlFastBuffer(), - _qtlFastSize(0), - _qtlFast(nullptr) +FieldSearcherBase::FieldSearcherBase() noexcept + : _qtl(), + _qtlFastBuffer(), + _qtlFastSize(0), + _qtlFast(nullptr) { } -FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) : - _qtl(), - _qtlFastBuffer(), - _qtlFastSize(0), - _qtlFast(nullptr) +FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) + : _qtl(), + _qtlFastBuffer(), + _qtlFastSize(0), + _qtlFast(nullptr) { prepare(org._qtl); } -FieldSearcherBase::~FieldSearcherBase() -{ -} +FieldSearcherBase::~FieldSearcherBase() = default; FieldSearcherBase & FieldSearcherBase::operator = (const FieldSearcherBase & org) { @@ -68,20 +66,16 @@ void FieldSearcherBase::prepare(const QueryTermList & qtl) } } -FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) : - FieldSearcherBase(), - _field(fId), - _matchType(defaultPrefix ? PREFIX : REGULAR), - _maxFieldLength(0x100000), - _currentElementId(0), - _currentElementWeight(1), - _pureUsAsciiCount(0), - _pureUsAsciiFieldCount(0), - _anyUtf8Count(0), - _anyUtf8FieldCount(0), - _words(0), - _badUtf8Count(0), - _zeroCount(0) +FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept + : FieldSearcherBase(), + _field(fId), + _matchType(defaultPrefix ? PREFIX : REGULAR), + _maxFieldLength(0x100000), + _currentElementId(0), + _currentElementWeight(1), + _words(0), + _badUtf8Count(0), + _zeroCount(0) { zeroStat(); } @@ -136,26 +130,10 @@ void FieldSearcher::prepareFieldId() } } -void FieldSearcher::addStat(const FieldSearcher & toAdd) -{ - _pureUsAsciiCount += toAdd._pureUsAsciiCount; - _pureUsAsciiFieldCount += toAdd._pureUsAsciiFieldCount; - _anyUtf8Count += toAdd._anyUtf8Count; - _anyUtf8FieldCount += toAdd._anyUtf8FieldCount; - _badUtf8Count += toAdd._badUtf8Count; - _zeroCount += toAdd._zeroCount; - for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] += toAdd._utf8Count[i]; } -} - void FieldSearcher::zeroStat() { - _pureUsAsciiCount = 0; - _pureUsAsciiFieldCount = 0; - _anyUtf8Count = 0; - _anyUtf8FieldCount = 0; _badUtf8Count = 0; _zeroCount = 0; - for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] = 0; } } void FieldSearcher::init() @@ -182,43 +160,53 @@ void FieldSearcher::init() _wordChar[0xd7] = 0; _wordChar[0xf7] = 0; - if (1) /* _doAccentRemoval */ { - _foldLowCase[0xc0] = 'a'; - _foldLowCase[0xc1] = 'a'; - _foldLowCase[0xc2] = 'a'; - _foldLowCase[0xc3] = 'a'; // A tilde - _foldLowCase[0xc7] = 'c'; - _foldLowCase[0xc8] = 'e'; - _foldLowCase[0xc9] = 'e'; - _foldLowCase[0xca] = 'e'; - _foldLowCase[0xcb] = 'e'; - _foldLowCase[0xcc] = 'i'; // I grave - _foldLowCase[0xcd] = 'i'; - _foldLowCase[0xce] = 'i'; - _foldLowCase[0xcf] = 'i'; - _foldLowCase[0xd3] = 'o'; - _foldLowCase[0xd4] = 'o'; - _foldLowCase[0xda] = 'u'; - _foldLowCase[0xdb] = 'u'; - - _foldLowCase[0xe0] = 'a'; - _foldLowCase[0xe1] = 'a'; - _foldLowCase[0xe2] = 'a'; - _foldLowCase[0xe3] = 'a'; // a tilde - _foldLowCase[0xe7] = 'c'; - _foldLowCase[0xe8] = 'e'; - _foldLowCase[0xe9] = 'e'; - _foldLowCase[0xea] = 'e'; - _foldLowCase[0xeb] = 'e'; - _foldLowCase[0xec] = 'i'; // i grave - _foldLowCase[0xed] = 'i'; - _foldLowCase[0xee] = 'i'; - _foldLowCase[0xef] = 'i'; - _foldLowCase[0xf3] = 'o'; - _foldLowCase[0xf4] = 'o'; - _foldLowCase[0xfa] = 'u'; - _foldLowCase[0xfb] = 'u'; - } + _foldLowCase[0xc0] = 'a'; + _foldLowCase[0xc1] = 'a'; + _foldLowCase[0xc2] = 'a'; + _foldLowCase[0xc3] = 'a'; + _foldLowCase[0xc7] = 'c'; + _foldLowCase[0xc8] = 'e'; + _foldLowCase[0xc9] = 'e'; + _foldLowCase[0xca] = 'e'; + _foldLowCase[0xcb] = 'e'; + _foldLowCase[0xcc] = 'i'; + _foldLowCase[0xcd] = 'i'; + _foldLowCase[0xce] = 'i'; + _foldLowCase[0xcf] = 'i'; + _foldLowCase[0xd1] = 'n'; + _foldLowCase[0xd2] = 'o'; + _foldLowCase[0xd3] = 'o'; + _foldLowCase[0xd4] = 'o'; + _foldLowCase[0xd5] = 'o'; + _foldLowCase[0xd9] = 'u'; + _foldLowCase[0xda] = 'u'; + _foldLowCase[0xdb] = 'u'; + _foldLowCase[0xdc] = 'u'; + _foldLowCase[0xdd] = 'y'; + _foldLowCase[0xe0] = 'a'; + _foldLowCase[0xe1] = 'a'; + _foldLowCase[0xe2] = 'a'; + _foldLowCase[0xe3] = 'a'; + _foldLowCase[0xe7] = 'c'; + _foldLowCase[0xe8] = 'e'; + _foldLowCase[0xe9] = 'e'; + _foldLowCase[0xea] = 'e'; + _foldLowCase[0xeb] = 'e'; + _foldLowCase[0xec] = 'i'; + _foldLowCase[0xed] = 'i'; + _foldLowCase[0xee] = 'i'; + _foldLowCase[0xef] = 'i'; + _foldLowCase[0xf1] = 'n'; + _foldLowCase[0xf2] = 'o'; + _foldLowCase[0xf3] = 'o'; + _foldLowCase[0xf4] = 'o'; + _foldLowCase[0xf5] = 'o'; + _foldLowCase[0xf9] = 'u'; + _foldLowCase[0xfa] = 'u'; + _foldLowCase[0xfb] = 'u'; + _foldLowCase[0xfc] = 'u'; + _foldLowCase[0xfd] = 'y'; + _foldLowCase[0xff] = 'y'; } void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, @@ -296,10 +284,10 @@ FieldSearcher::IteratorHandler::onCollectionStart(const Content & c) const document::FieldValue & fv = c.getValue(); LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str()); if (fv.isA(document::FieldValue::Type::ARRAY)) { - const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(fv); + const auto & afv = static_cast<const document::ArrayFieldValue &>(fv); LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size()); } else if (fv.isA(document::FieldValue::Type::WSET)) { - const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv); + const auto & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv); LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size()); } } diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index e79dacf827e..c231a96711c 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -14,12 +14,6 @@ namespace vsm { using termcount_t = size_t; using termsize_t = size_t; -#if defined(COLLECT_CHAR_STAT) - #define NEED_CHAR_STAT(a) { a; } -#else - #define NEED_CHAR_STAT(a) -#endif - using ucs4_t = uint32_t; using cmptype_t = ucs4_t; using SearcherBuf = vespalib::Array<cmptype_t>; @@ -33,9 +27,9 @@ protected: private: CharVector _qtlFastBuffer; protected: - FieldSearcherBase(); + FieldSearcherBase() noexcept; FieldSearcherBase(const FieldSearcherBase & org); - virtual ~FieldSearcherBase(void); + virtual ~FieldSearcherBase(); FieldSearcherBase & operator = (const FieldSearcherBase & org); void prepare(const search::streaming::QueryTermList & qtl); size_t _qtlFastSize; @@ -53,7 +47,8 @@ public: EXACT }; - FieldSearcher(FieldIdT fId, bool defaultPrefix=false); + explicit FieldSearcher(FieldIdT fId) noexcept : FieldSearcher(fId, false) {} + FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept; ~FieldSearcher() override; virtual std::unique_ptr<FieldSearcher> duplicate() const = 0; bool search(const StorageDocument & doc); @@ -74,16 +69,7 @@ public: static search::byte iswordchar(search::byte c) { return _wordChar[c]; } static search::byte isspace(search::byte c) { return ! iswordchar(c); } static size_t countWords(const FieldRef & f); - unsigned pureUsAsciiCount() const { return _pureUsAsciiCount; } - unsigned pureUsAsciiFieldCount() const { return _pureUsAsciiFieldCount; } - unsigned anyUtf8Count() const { return _anyUtf8Count; } - unsigned anyUtf8FieldCount() const { return _anyUtf8FieldCount; } - unsigned badUtf8Count() const { return _badUtf8Count; } - unsigned zeroCount() const { return _zeroCount; } - unsigned utf8Count(size_t sz) const { return _utf8Count[1+sz]; } - const unsigned * utf8Count() const { return _utf8Count; } int32_t getCurrentWeight() const { return _currentElementWeight; } - void addStat(const FieldSearcher & toAdd); void zeroStat(); FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; } size_t maxFieldLength() const { return _maxFieldLength; } @@ -98,7 +84,7 @@ private: void onStructStart(const Content & c) override; public: - IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {} + explicit IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {} }; friend class IteratorHandler; // to allow calls to onValue(); @@ -113,24 +99,13 @@ private: unsigned _maxFieldLength; uint32_t _currentElementId; int32_t _currentElementWeight; // Contains the weight of the current item being evaluated. - /// Number of bytes in blocks containing pure us-ascii - unsigned _pureUsAsciiCount; - /// Number of blocks containing pure us-ascii - unsigned _pureUsAsciiFieldCount; - /// Number of bytes in blocks containing any non us-ascii - unsigned _anyUtf8Count; - /// Number of blocks containing any non us-ascii - unsigned _anyUtf8FieldCount; protected: /// Number of terms searched. unsigned _words; /// Number of utf8 bytes by utf8 size. - unsigned _utf8Count[6]; unsigned _badUtf8Count; unsigned _zeroCount; protected: - void addPureUsAsciiField(size_t sz) { _pureUsAsciiCount += sz; _pureUsAsciiFieldCount++;; } - void addAnyUtf8Field(size_t sz) { _anyUtf8Count += sz; _anyUtf8FieldCount++; } /** * Adds a hit to the given query term. * For each call to onValue() a batch of words are processed, and the position is local to this batch. diff --git a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp index a2122f08995..d7d73899e53 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp @@ -36,7 +36,7 @@ FUTF8StrChrFieldSearcher::ansiFold(const char * toFold, size_t sz, char * folded for(size_t i=0; i < sz; i++) { byte c = toFold[i]; if (c>=128) { retval = false; break; } - folded[i] = FieldSearcher::_foldLowCase[c]; + folded[i] = fold(c); } return retval; } @@ -209,7 +209,6 @@ size_t FUTF8StrChrFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) folded[f.size()+1] = 0x01; memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values return match(folded, f.size(), qt); - NEED_CHAR_STAT(addPureUsAsciiField(f.size())); } else { return UTF8StrChrFieldSearcher::matchTerm(f, qt); } @@ -227,7 +226,6 @@ size_t FUTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t min folded[f.size()+1] = 0x01; memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values return match(folded, f.size(), mintsz, &_qtl[0], _qtl.size()); - NEED_CHAR_STAT(addPureUsAsciiField(f.size())); } else { return UTF8StrChrFieldSearcher::matchTerms(f, mintsz); } diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp index 2488d198b03..651d1dcad9f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp @@ -42,7 +42,6 @@ UTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) } words++; } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); return words; } diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp index c31102ec0ab..ebdf69d0b30 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp @@ -32,10 +32,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * c = *p; } } else { - const byte * oldP(p); c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); if (Fast_UnicodeUtil::IsWordChar(c)) { - _utf8Count[p-oldP-1]++; const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); if (repl != nullptr) { size_t repllen = strlen(repl); @@ -50,8 +48,6 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * } else { if (c == Fast_UnicodeUtil::_BadUTF8Char) { _badUtf8Count++; - } else { - _utf8Count[p-oldP-1]++; } c = *p; } @@ -70,10 +66,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * c = *p; } } else { - const byte * oldP(p); c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) { - _utf8Count[p-oldP-1]++; const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); if (repl != nullptr) { size_t repllen = strlen(repl); @@ -89,8 +83,6 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * } else { if (c == Fast_UnicodeUtil::_BadUTF8Char) { _badUtf8Count++; - } else { - _utf8Count[p-oldP-1]++; } break; } @@ -128,7 +120,6 @@ UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt } words++; } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); return words; } @@ -154,7 +145,6 @@ UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt) addHit(qt,0); } } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); return 1; } @@ -188,7 +178,6 @@ UTF8StringFieldSearcherBase::matchTermSubstring(const FieldRef & f, QueryTerm & } } } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); return words + 1; // we must also count the last word } @@ -305,8 +294,6 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T } if (c == Fast_UnicodeUtil::_BadUTF8Char) { _badUtf8Count++; - } else { - _utf8Count[p-oldP-1]++; } } } diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp index 88091c6ab4e..25ef9ae7618 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp @@ -45,8 +45,6 @@ UTF8SubStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn); fn++ ); } } - - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); return words + 1; // we must also count the last word } |