aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-12-19 17:02:19 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-04 10:42:37 +0000
commit0f9915ebdfb2931ebb3c06ac55b537f42477256b (patch)
treea6f3a41e547c032d773ef391da22ca04bcfcacc3
parent5f1ec31a0decec88322835705afa26cc7d35fd3e (diff)
- Modernize code
- Unify some conversion tables.
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp2
-rw-r--r--streamingvisitors/src/tests/searcher/searcher_test.cpp48
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp152
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h35
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp4
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp1
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp13
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp2
-rw-r--r--vespalib/src/vespa/fastlib/text/normwordfolder.cpp1
-rw-r--r--vespalib/src/vespa/fastlib/text/normwordfolder.h12
10 files changed, 103 insertions, 167 deletions
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index a658ff5f3d6..fe6f73367d7 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -9,7 +9,7 @@ namespace {
class CharInfo {
public:
CharInfo();
- uint8_t get(uint8_t c) const { return _charInfo[c]; }
+ uint8_t get(uint8_t c) const noexcept { return _charInfo[c]; }
private:
uint8_t _charInfo[256];
};
diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp
index 4492dfac02b..1ce285c2103 100644
--- a/streamingvisitors/src/tests/searcher/searcher_test.cpp
+++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp
@@ -47,7 +47,7 @@ class String
private:
const std::string & _str;
public:
- String(const std::string & str) : _str(str) {}
+ explicit String(const std::string & str) : _str(str) {}
bool operator==(const String & rhs) const {
return _str == rhs._str;
}
@@ -57,13 +57,13 @@ class Query
{
private:
void setupQuery(const StringList & terms) {
- for (size_t i = 0; i < terms.size(); ++i) {
- ParsedQueryTerm pqt = parseQueryTerm(terms[i]);
+ for (const auto & term : terms) {
+ ParsedQueryTerm pqt = parseQueryTerm(term);
ParsedTerm pt = parseTerm(pqt.second);
qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second));
}
- for (size_t i = 0; i < qtv.size(); ++i) {
- qtl.push_back(qtv[i].get());
+ for (const auto & i : qtv) {
+ qtl.push_back(i.get());
}
}
public:
@@ -72,14 +72,14 @@ public:
QueryNodeResultFactory eqnr;
std::vector<QueryTerm::UP> qtv;
QueryTermList qtl;
- Query(const StringList & terms);
+ explicit Query(const StringList & terms);
~Query();
static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) {
size_t i = queryTerm.find(':');
if (i != std::string::npos) {
- return ParsedQueryTerm(queryTerm.substr(0, i), queryTerm.substr(i + 1));
+ return {queryTerm.substr(0, i), queryTerm.substr(i + 1)};
}
- return ParsedQueryTerm(std::string(), queryTerm);
+ return {std::string(), queryTerm};
}
static ParsedTerm parseTerm(const std::string & term) {
if (term[0] == '*' && term[term.size() - 1] == '*') {
@@ -254,8 +254,8 @@ getFieldValue(const StringList & fv)
static ArrayDataType type(*DataType::STRING);
ArrayFieldValue afv(type);
- for (size_t i = 0; i < fv.size(); ++i) {
- afv.add(StringFieldValue(fv[i]));
+ for (const auto & v : fv) {
+ afv.add(StringFieldValue(v));
}
return afv;
}
@@ -265,8 +265,8 @@ getFieldValue(const LongList & fv)
{
static ArrayDataType type(*DataType::LONG);
ArrayFieldValue afv(type);
- for (size_t i = 0; i < fv.size(); ++i) {
- afv.add(LongFieldValue(fv[i]));
+ for (long v : fv) {
+ afv.add(LongFieldValue(v));
}
return afv;
}
@@ -276,8 +276,8 @@ getFieldValue(const FloatList & fv)
{
static ArrayDataType type(*DataType::FLOAT);
ArrayFieldValue afv(type);
- for (size_t i = 0; i < fv.size(); ++i) {
- afv.add(FloatFieldValue(fv[i]));
+ for (float v : fv) {
+ afv.add(FloatFieldValue(v));
}
return afv;
}
@@ -299,8 +299,8 @@ void
assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp)
{
HitsList hl;
- for (size_t i = 0; i < exp.size(); ++i) {
- hl.push_back(exp[i] ? Hits().add(0) : Hits());
+ for (bool v : exp) {
+ hl.push_back(v ? Hits().add(0) : Hits());
}
assertSearch(fs, query, fv, hl);
}
@@ -316,7 +316,7 @@ performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & f
// setup document
SharedFieldPathMap sfim(new FieldPathMapT());
- sfim->push_back(FieldPath());
+ sfim->emplace_back();
StorageDocument doc(std::make_unique<document::Document>(), sfim, 1);
doc.setField(0, document::FieldValue::UP(fv.clone()));
@@ -369,7 +369,7 @@ assertSnippetModifier(const StringList & query, const std::string & fv, const st
void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp)
{
FieldValue::UP mfv = setup.modifier.modify(fv);
- const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get());
+ const auto & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get());
const std::string & actual = lfv.getValue();
EXPECT_EQUAL(actual.size(), exp.size());
EXPECT_EQUAL(actual, exp);
@@ -377,11 +377,11 @@ void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv,
void assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms)
{
- if (terms.size() == 0) {
- ASSERT_TRUE(man.getModifiers().getModifier(fId) == NULL);
+ if (terms.empty()) {
+ ASSERT_TRUE(man.getModifiers().getModifier(fId) == nullptr);
return;
}
- ASSERT_TRUE(man.getModifiers().getModifier(fId) != NULL);
+ ASSERT_TRUE(man.getModifiers().getModifier(fId) != nullptr);
UTF8SubstringSnippetModifier * searcher =
(static_cast<SnippetModifier *>(man.getModifiers().getModifier(fId)))->getSearcher().get();
EXPECT_EQUAL(searcher->getQueryTerms().size(), terms.size());
@@ -466,7 +466,7 @@ testStrChrFieldSearcher(StrChrFieldSearcher & fs)
TEST("verify correct term parsing") {
ASSERT_TRUE(Query::parseQueryTerm("index:term").first == "index");
ASSERT_TRUE(Query::parseQueryTerm("index:term").second == "term");
- ASSERT_TRUE(Query::parseQueryTerm("term").first == "");
+ ASSERT_TRUE(Query::parseQueryTerm("term").first.empty());
ASSERT_TRUE(Query::parseQueryTerm("term").second == "term");
ASSERT_TRUE(Query::parseTerm("*substr*").first == "substr");
ASSERT_TRUE(Query::parseTerm("*substr*").second == TermType::SUBSTRINGTERM);
@@ -822,13 +822,13 @@ TEST("snippet modifier manager") {
Query query(StringList().add("i2:foo").add("i2:*bar*"));
man.setup(query.qtl, specMap, indexMap, *env.field_paths, env.query_env);
{
- SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0));
+ auto * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0));
UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get();
EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u);
EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u);
}
{
- SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1));
+ auto * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1));
UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get();
EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u);
EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
index c797e6751ee..851606634cc 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
@@ -28,26 +28,24 @@ static force __forceInit;
byte FieldSearcher::_foldLowCase[256];
byte FieldSearcher::_wordChar[256];
-FieldSearcherBase::FieldSearcherBase() :
- _qtl(),
- _qtlFastBuffer(),
- _qtlFastSize(0),
- _qtlFast(nullptr)
+FieldSearcherBase::FieldSearcherBase() noexcept
+ : _qtl(),
+ _qtlFastBuffer(),
+ _qtlFastSize(0),
+ _qtlFast(nullptr)
{
}
-FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) :
- _qtl(),
- _qtlFastBuffer(),
- _qtlFastSize(0),
- _qtlFast(nullptr)
+FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org)
+ : _qtl(),
+ _qtlFastBuffer(),
+ _qtlFastSize(0),
+ _qtlFast(nullptr)
{
prepare(org._qtl);
}
-FieldSearcherBase::~FieldSearcherBase()
-{
-}
+FieldSearcherBase::~FieldSearcherBase() = default;
FieldSearcherBase & FieldSearcherBase::operator = (const FieldSearcherBase & org)
{
@@ -68,20 +66,16 @@ void FieldSearcherBase::prepare(const QueryTermList & qtl)
}
}
-FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) :
- FieldSearcherBase(),
- _field(fId),
- _matchType(defaultPrefix ? PREFIX : REGULAR),
- _maxFieldLength(0x100000),
- _currentElementId(0),
- _currentElementWeight(1),
- _pureUsAsciiCount(0),
- _pureUsAsciiFieldCount(0),
- _anyUtf8Count(0),
- _anyUtf8FieldCount(0),
- _words(0),
- _badUtf8Count(0),
- _zeroCount(0)
+FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept
+ : FieldSearcherBase(),
+ _field(fId),
+ _matchType(defaultPrefix ? PREFIX : REGULAR),
+ _maxFieldLength(0x100000),
+ _currentElementId(0),
+ _currentElementWeight(1),
+ _words(0),
+ _badUtf8Count(0),
+ _zeroCount(0)
{
zeroStat();
}
@@ -136,26 +130,10 @@ void FieldSearcher::prepareFieldId()
}
}
-void FieldSearcher::addStat(const FieldSearcher & toAdd)
-{
- _pureUsAsciiCount += toAdd._pureUsAsciiCount;
- _pureUsAsciiFieldCount += toAdd._pureUsAsciiFieldCount;
- _anyUtf8Count += toAdd._anyUtf8Count;
- _anyUtf8FieldCount += toAdd._anyUtf8FieldCount;
- _badUtf8Count += toAdd._badUtf8Count;
- _zeroCount += toAdd._zeroCount;
- for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] += toAdd._utf8Count[i]; }
-}
-
void FieldSearcher::zeroStat()
{
- _pureUsAsciiCount = 0;
- _pureUsAsciiFieldCount = 0;
- _anyUtf8Count = 0;
- _anyUtf8FieldCount = 0;
_badUtf8Count = 0;
_zeroCount = 0;
- for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] = 0; }
}
void FieldSearcher::init()
@@ -182,43 +160,53 @@ void FieldSearcher::init()
_wordChar[0xd7] = 0;
_wordChar[0xf7] = 0;
- if (1) /* _doAccentRemoval */ {
- _foldLowCase[0xc0] = 'a';
- _foldLowCase[0xc1] = 'a';
- _foldLowCase[0xc2] = 'a';
- _foldLowCase[0xc3] = 'a'; // A tilde
- _foldLowCase[0xc7] = 'c';
- _foldLowCase[0xc8] = 'e';
- _foldLowCase[0xc9] = 'e';
- _foldLowCase[0xca] = 'e';
- _foldLowCase[0xcb] = 'e';
- _foldLowCase[0xcc] = 'i'; // I grave
- _foldLowCase[0xcd] = 'i';
- _foldLowCase[0xce] = 'i';
- _foldLowCase[0xcf] = 'i';
- _foldLowCase[0xd3] = 'o';
- _foldLowCase[0xd4] = 'o';
- _foldLowCase[0xda] = 'u';
- _foldLowCase[0xdb] = 'u';
-
- _foldLowCase[0xe0] = 'a';
- _foldLowCase[0xe1] = 'a';
- _foldLowCase[0xe2] = 'a';
- _foldLowCase[0xe3] = 'a'; // a tilde
- _foldLowCase[0xe7] = 'c';
- _foldLowCase[0xe8] = 'e';
- _foldLowCase[0xe9] = 'e';
- _foldLowCase[0xea] = 'e';
- _foldLowCase[0xeb] = 'e';
- _foldLowCase[0xec] = 'i'; // i grave
- _foldLowCase[0xed] = 'i';
- _foldLowCase[0xee] = 'i';
- _foldLowCase[0xef] = 'i';
- _foldLowCase[0xf3] = 'o';
- _foldLowCase[0xf4] = 'o';
- _foldLowCase[0xfa] = 'u';
- _foldLowCase[0xfb] = 'u';
- }
+ _foldLowCase[0xc0] = 'a';
+ _foldLowCase[0xc1] = 'a';
+ _foldLowCase[0xc2] = 'a';
+ _foldLowCase[0xc3] = 'a';
+ _foldLowCase[0xc7] = 'c';
+ _foldLowCase[0xc8] = 'e';
+ _foldLowCase[0xc9] = 'e';
+ _foldLowCase[0xca] = 'e';
+ _foldLowCase[0xcb] = 'e';
+ _foldLowCase[0xcc] = 'i';
+ _foldLowCase[0xcd] = 'i';
+ _foldLowCase[0xce] = 'i';
+ _foldLowCase[0xcf] = 'i';
+ _foldLowCase[0xd1] = 'n';
+ _foldLowCase[0xd2] = 'o';
+ _foldLowCase[0xd3] = 'o';
+ _foldLowCase[0xd4] = 'o';
+ _foldLowCase[0xd5] = 'o';
+ _foldLowCase[0xd9] = 'u';
+ _foldLowCase[0xda] = 'u';
+ _foldLowCase[0xdb] = 'u';
+ _foldLowCase[0xdc] = 'u';
+ _foldLowCase[0xdd] = 'y';
+ _foldLowCase[0xe0] = 'a';
+ _foldLowCase[0xe1] = 'a';
+ _foldLowCase[0xe2] = 'a';
+ _foldLowCase[0xe3] = 'a';
+ _foldLowCase[0xe7] = 'c';
+ _foldLowCase[0xe8] = 'e';
+ _foldLowCase[0xe9] = 'e';
+ _foldLowCase[0xea] = 'e';
+ _foldLowCase[0xeb] = 'e';
+ _foldLowCase[0xec] = 'i';
+ _foldLowCase[0xed] = 'i';
+ _foldLowCase[0xee] = 'i';
+ _foldLowCase[0xef] = 'i';
+ _foldLowCase[0xf1] = 'n';
+ _foldLowCase[0xf2] = 'o';
+ _foldLowCase[0xf3] = 'o';
+ _foldLowCase[0xf4] = 'o';
+ _foldLowCase[0xf5] = 'o';
+ _foldLowCase[0xf9] = 'u';
+ _foldLowCase[0xfa] = 'u';
+ _foldLowCase[0xfb] = 'u';
+ _foldLowCase[0xfc] = 'u';
+ _foldLowCase[0xfd] = 'y';
+ _foldLowCase[0xff] = 'y';
}
void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm,
@@ -296,10 +284,10 @@ FieldSearcher::IteratorHandler::onCollectionStart(const Content & c)
const document::FieldValue & fv = c.getValue();
LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str());
if (fv.isA(document::FieldValue::Type::ARRAY)) {
- const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(fv);
+ const auto & afv = static_cast<const document::ArrayFieldValue &>(fv);
LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size());
} else if (fv.isA(document::FieldValue::Type::WSET)) {
- const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv);
+ const auto & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv);
LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size());
}
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
index e79dacf827e..c231a96711c 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
@@ -14,12 +14,6 @@ namespace vsm {
using termcount_t = size_t;
using termsize_t = size_t;
-#if defined(COLLECT_CHAR_STAT)
- #define NEED_CHAR_STAT(a) { a; }
-#else
- #define NEED_CHAR_STAT(a)
-#endif
-
using ucs4_t = uint32_t;
using cmptype_t = ucs4_t;
using SearcherBuf = vespalib::Array<cmptype_t>;
@@ -33,9 +27,9 @@ protected:
private:
CharVector _qtlFastBuffer;
protected:
- FieldSearcherBase();
+ FieldSearcherBase() noexcept;
FieldSearcherBase(const FieldSearcherBase & org);
- virtual ~FieldSearcherBase(void);
+ virtual ~FieldSearcherBase();
FieldSearcherBase & operator = (const FieldSearcherBase & org);
void prepare(const search::streaming::QueryTermList & qtl);
size_t _qtlFastSize;
@@ -53,7 +47,8 @@ public:
EXACT
};
- FieldSearcher(FieldIdT fId, bool defaultPrefix=false);
+ explicit FieldSearcher(FieldIdT fId) noexcept : FieldSearcher(fId, false) {}
+ FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept;
~FieldSearcher() override;
virtual std::unique_ptr<FieldSearcher> duplicate() const = 0;
bool search(const StorageDocument & doc);
@@ -74,16 +69,7 @@ public:
static search::byte iswordchar(search::byte c) { return _wordChar[c]; }
static search::byte isspace(search::byte c) { return ! iswordchar(c); }
static size_t countWords(const FieldRef & f);
- unsigned pureUsAsciiCount() const { return _pureUsAsciiCount; }
- unsigned pureUsAsciiFieldCount() const { return _pureUsAsciiFieldCount; }
- unsigned anyUtf8Count() const { return _anyUtf8Count; }
- unsigned anyUtf8FieldCount() const { return _anyUtf8FieldCount; }
- unsigned badUtf8Count() const { return _badUtf8Count; }
- unsigned zeroCount() const { return _zeroCount; }
- unsigned utf8Count(size_t sz) const { return _utf8Count[1+sz]; }
- const unsigned * utf8Count() const { return _utf8Count; }
int32_t getCurrentWeight() const { return _currentElementWeight; }
- void addStat(const FieldSearcher & toAdd);
void zeroStat();
FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; }
size_t maxFieldLength() const { return _maxFieldLength; }
@@ -98,7 +84,7 @@ private:
void onStructStart(const Content & c) override;
public:
- IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {}
+ explicit IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {}
};
friend class IteratorHandler; // to allow calls to onValue();
@@ -113,24 +99,13 @@ private:
unsigned _maxFieldLength;
uint32_t _currentElementId;
int32_t _currentElementWeight; // Contains the weight of the current item being evaluated.
- /// Number of bytes in blocks containing pure us-ascii
- unsigned _pureUsAsciiCount;
- /// Number of blocks containing pure us-ascii
- unsigned _pureUsAsciiFieldCount;
- /// Number of bytes in blocks containing any non us-ascii
- unsigned _anyUtf8Count;
- /// Number of blocks containing any non us-ascii
- unsigned _anyUtf8FieldCount;
protected:
/// Number of terms searched.
unsigned _words;
/// Number of utf8 bytes by utf8 size.
- unsigned _utf8Count[6];
unsigned _badUtf8Count;
unsigned _zeroCount;
protected:
- void addPureUsAsciiField(size_t sz) { _pureUsAsciiCount += sz; _pureUsAsciiFieldCount++;; }
- void addAnyUtf8Field(size_t sz) { _anyUtf8Count += sz; _anyUtf8FieldCount++; }
/**
* Adds a hit to the given query term.
* For each call to onValue() a batch of words are processed, and the position is local to this batch.
diff --git a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp
index a2122f08995..d7d73899e53 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp
@@ -36,7 +36,7 @@ FUTF8StrChrFieldSearcher::ansiFold(const char * toFold, size_t sz, char * folded
for(size_t i=0; i < sz; i++) {
byte c = toFold[i];
if (c>=128) { retval = false; break; }
- folded[i] = FieldSearcher::_foldLowCase[c];
+ folded[i] = fold(c);
}
return retval;
}
@@ -209,7 +209,6 @@ size_t FUTF8StrChrFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
folded[f.size()+1] = 0x01;
memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values
return match(folded, f.size(), qt);
- NEED_CHAR_STAT(addPureUsAsciiField(f.size()));
} else {
return UTF8StrChrFieldSearcher::matchTerm(f, qt);
}
@@ -227,7 +226,6 @@ size_t FUTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t min
folded[f.size()+1] = 0x01;
memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values
return match(folded, f.size(), mintsz, &_qtl[0], _qtl.size());
- NEED_CHAR_STAT(addPureUsAsciiField(f.size()));
} else {
return UTF8StrChrFieldSearcher::matchTerms(f, mintsz);
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
index 2488d198b03..651d1dcad9f 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
@@ -42,7 +42,6 @@ UTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
}
words++;
}
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
return words;
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
index c31102ec0ab..ebdf69d0b30 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
@@ -32,10 +32,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
c = *p;
}
} else {
- const byte * oldP(p);
c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p);
if (Fast_UnicodeUtil::IsWordChar(c)) {
- _utf8Count[p-oldP-1]++;
const char *repl = Fast_NormalizeWordFolder::ReplacementString(c);
if (repl != nullptr) {
size_t repllen = strlen(repl);
@@ -50,8 +48,6 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
} else {
if (c == Fast_UnicodeUtil::_BadUTF8Char) {
_badUtf8Count++;
- } else {
- _utf8Count[p-oldP-1]++;
}
c = *p;
}
@@ -70,10 +66,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
c = *p;
}
} else {
- const byte * oldP(p);
c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p);
if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) {
- _utf8Count[p-oldP-1]++;
const char *repl = Fast_NormalizeWordFolder::ReplacementString(c);
if (repl != nullptr) {
size_t repllen = strlen(repl);
@@ -89,8 +83,6 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
} else {
if (c == Fast_UnicodeUtil::_BadUTF8Char) {
_badUtf8Count++;
- } else {
- _utf8Count[p-oldP-1]++;
}
break;
}
@@ -128,7 +120,6 @@ UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt
}
words++;
}
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
return words;
}
@@ -154,7 +145,6 @@ UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt)
addHit(qt,0);
}
}
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
return 1;
}
@@ -188,7 +178,6 @@ UTF8StringFieldSearcherBase::matchTermSubstring(const FieldRef & f, QueryTerm &
}
}
}
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
return words + 1; // we must also count the last word
}
@@ -305,8 +294,6 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T
}
if (c == Fast_UnicodeUtil::_BadUTF8Char) {
_badUtf8Count++;
- } else {
- _utf8Count[p-oldP-1]++;
}
}
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
index 88091c6ab4e..25ef9ae7618 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
@@ -45,8 +45,6 @@ UTF8SubStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn); fn++ );
}
}
-
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
return words + 1; // we must also count the last word
}
diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp
index f9dbf202fcb..8d3ccad9900 100644
--- a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp
+++ b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp
@@ -74,7 +74,6 @@ Fast_NormalizeWordFolder::Initialize()
_foldCase[0xda] = 'u';
_foldCase[0xdb] = 'u';
_foldCase[0xdd] = 'y';
-
_foldCase[0xe0] = 'a';
_foldCase[0xe1] = 'a';
_foldCase[0xe2] = 'a';
diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.h b/vespalib/src/vespa/fastlib/text/normwordfolder.h
index c596b0fd2b4..121a83e260d 100644
--- a/vespalib/src/vespa/fastlib/text/normwordfolder.h
+++ b/vespalib/src/vespa/fastlib/text/normwordfolder.h
@@ -104,20 +104,16 @@ public:
switch(testchar) {
case 0xc4:
case 0xe4: // A/a with diaeresis
+ case 0xc6:
+ case 0xe6: // Letter/ligature AE/ae
return "ae";
case 0xc5:
case 0xe5: // A/a with ring
return "aa";
- case 0xc6:
- case 0xe6: // Letter/ligature AE/ae
- return "ae";
-
case 0xd6:
case 0xf6: // O/o with diaeresis
- return "oe";
-
case 0xd8:
case 0xf8: // O/o with stroke
return "oe";
@@ -133,10 +129,6 @@ public:
case 0xde:
case 0xfe: // norse "thorn"
return "th";
-
- default:
- return nullptr;
-
}
}