aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-09 07:35:14 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-10 08:12:55 +0000
commit02c5bce07737a899726097e577c6dd1121ca5a7c (patch)
treee6c73d2df7f9f2c55322330cbc4ba644a2bbb8e0 /streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
parent4388490c151581bc6e04059baa04b580c80577d3 (diff)
Simplify ancient carefully hand optimized code in favour of simple readable code
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp')
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp61
1 files changed, 37 insertions, 24 deletions
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
index 4b0efd58a56..22934ba74d2 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
@@ -40,6 +40,8 @@ setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
searcher->setMatchType(FieldSearcher::EXACT);
} else if (arg1 == "word") {
searcher->setMatchType(FieldSearcher::EXACT);
+ } else if (arg1 == "cased") {
+ searcher->setMatchType(FieldSearcher::CASED);
}
}
@@ -51,6 +53,7 @@ FieldSearchSpec::FieldSearchSpec()
_maxLength(0x100000),
_searcher(),
_searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE),
+ _normalize_mode(Normalizing::LOWERCASE_AND_FOLD),
_arg1(),
_reconfigured(false)
{
@@ -60,15 +63,15 @@ FieldSearchSpec::~FieldSearchSpec() = default;
FieldSearchSpec::FieldSearchSpec(FieldSearchSpec&& rhs) noexcept = default;
FieldSearchSpec& FieldSearchSpec::operator=(FieldSearchSpec&& rhs) noexcept = default;
-FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & fname,
- VsmfieldsConfig::Fieldspec::Searchmethod searchDef,
- const vespalib::string & arg1, size_t maxLength_) :
+FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & fname, Searchmethod searchDef,
+ Normalizing normalize_mode, vespalib::stringref arg1_in, size_t maxLength_in) :
_id(fid),
_name(fname),
- _maxLength(maxLength_),
+ _maxLength(maxLength_in),
_searcher(),
_searchMethod(searchDef),
- _arg1(arg1),
+ _normalize_mode(normalize_mode),
+ _arg1(arg1_in),
_reconfigured(false)
{
switch(searchDef) {
@@ -79,14 +82,16 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string &
case VsmfieldsConfig::Fieldspec::Searchmethod::NONE:
case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8:
case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8:
- if (arg1 == "substring") {
+ if (_arg1 == "substring") {
_searcher = std::make_unique<UTF8SubStringFieldSearcher>(fid);
- } else if (arg1 == "suffix") {
+ } else if (_arg1 == "suffix") {
_searcher = std::make_unique<UTF8SuffixStringFieldSearcher>(fid);
- } else if (arg1 == "exact") {
+ } else if (_arg1 == "exact") {
_searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (arg1 == "word") {
+ } else if (_arg1 == "word") {
_searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
+ } else if (_arg1 == "cased") {
+ _searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid);
} else if (searchDef == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) {
_searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid);
} else {
@@ -112,12 +117,12 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string &
_searcher = std::make_unique<GeoPosFieldSearcher>(fid);
break;
case VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR:
- auto dm = NearestNeighborFieldSearcher::distance_metric_from_string(arg1);
+ auto dm = NearestNeighborFieldSearcher::distance_metric_from_string(_arg1);
_searcher = std::make_unique<NearestNeighborFieldSearcher>(fid, dm);
break;
}
if (_searcher) {
- setMatchType(_searcher, arg1);
+ setMatchType(_searcher, _arg1);
_searcher->maxFieldLength(maxLength());
}
}
@@ -166,20 +171,20 @@ FieldSearchSpecMap::FieldSearchSpecMap() = default;
FieldSearchSpecMap::~FieldSearchSpecMap() = default;
namespace {
- const std::string _G_empty("");
- const std::string _G_value(".value");
- const std::regex _G_map1("\\{[a-zA-Z0-9]+\\}");
- const std::regex _G_map2("\\{\".*\"\\}");
- const std::regex _G_array("\\[[0-9]+\\]");
+ const std::string G_empty;
+ const std::string G_value(".value");
+ const std::regex G_map1("\\{[a-zA-Z0-9]+\\}");
+ const std::regex G_map2("\\{\".*\"\\}");
+ const std::regex G_array("\\[[0-9]+\\]");
}
vespalib::string
FieldSearchSpecMap::stripNonFields(vespalib::stringref rawIndex)
{
if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) {
- std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value);
- index = std::regex_replace(index, _G_map2, _G_value);
- index = std::regex_replace(index, _G_array, _G_empty);
+ std::string index = std::regex_replace(std::string(rawIndex), G_map1, G_value);
+ index = std::regex_replace(index, G_map2, G_value);
+ index = std::regex_replace(index, G_array, G_empty);
return index;
}
return rawIndex;
@@ -258,17 +263,26 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearch
return ifm;
}
+search::streaming::Normalizing
+normalize_mode(VsmfieldsConfig::Fieldspec::Normalize normalize_mode) {
+ switch (normalize_mode) {
+ case VsmfieldsConfig::Fieldspec::Normalize::NONE: return search::streaming::Normalizing::NONE;
+ case VsmfieldsConfig::Fieldspec::Normalize::LOWERCASE: return search::streaming::Normalizing::LOWERCASE;
+ case VsmfieldsConfig::Fieldspec::Normalize::LOWERCASE_AND_FOLD: return search::streaming::Normalizing::LOWERCASE_AND_FOLD;
+ }
+ return search::streaming::Normalizing::LOWERCASE_AND_FOLD;
}
-bool
+}
+
+void
FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
{
- bool retval(true);
LOG(spam, "Parsing %zd fields", conf->fieldspec.size());
for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) {
LOG(spam, "Parsing %s", cfs.name.c_str());
FieldIdT fieldId = specMap().size();
- FieldSearchSpec fss(fieldId, cfs.name, cfs.searchmethod, cfs.arg1.c_str(), cfs.maxlength);
+ FieldSearchSpec fss(fieldId, cfs.name, cfs.searchmethod, normalize_mode(cfs.normalize), cfs.arg1, cfs.maxlength);
_specMap[fieldId] = std::move(fss);
_nameIdMap.add(cfs.name, fieldId);
LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str());
@@ -283,7 +297,6 @@ FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
}
_documentTypeMap[di.name] = indexMapp;
}
- return retval;
}
void
@@ -338,7 +351,7 @@ FieldSearchSpecMap::get_distance_metric(const vespalib::string& name) const
if (!itr->second.uses_nearest_neighbor_search_method()) {
return dm;
}
- return vsm::NearestNeighborFieldSearcher::distance_metric_from_string(itr->second.get_arg1());
+ return vsm::NearestNeighborFieldSearcher::distance_metric_from_string(itr->second.arg1());
}
vespalib::asciistream &