aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-05-15 00:40:43 +0200
committerGitHub <noreply@github.com>2022-05-15 00:40:43 +0200
commitdacf557add1c6a3ffab036cdf2f7dfdf9750b22e (patch)
tree3a9dfff58b98898e2e28c0337925f4f04e5eaeb0 /streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
parent2722ce9d1d1ec12d57ebd3833ce37b0958afb752 (diff)
Revert "Collapse vsm into streamingvisitors"
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp')
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp334
1 files changed, 0 insertions, 334 deletions
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
deleted file mode 100644
index 7043e63ec87..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "fieldsearchspec.h"
-#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8substringsearcher.h>
-#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h>
-#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h>
-#include <vespa/vsm/searcher/intfieldsearcher.h>
-#include <vespa/vsm/searcher/boolfieldsearcher.h>
-#include <vespa/vsm/searcher/floatfieldsearcher.h>
-#include <vespa/vsm/searcher/geo_pos_field_searcher.h>
-#include <vespa/vespalib/stllike/asciistream.h>
-#include <regex>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.fieldsearchspec");
-
-#define DEBUGMASK 0x01
-
-using search::streaming::ConstQueryTermList;
-using search::streaming::Query;
-using search::streaming::QueryTerm;
-
-namespace vsm {
-
-namespace {
-
-void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
- if (arg1 == "prefix") {
- searcher->setMatchType(FieldSearcher::PREFIX);
- } else if (arg1 == "substring") {
- searcher->setMatchType(FieldSearcher::SUBSTRING);
- } else if (arg1 == "suffix") {
- searcher->setMatchType(FieldSearcher::SUFFIX);
- } else if (arg1 == "exact") {
- searcher->setMatchType(FieldSearcher::EXACT);
- } else if (arg1 == "word") {
- searcher->setMatchType(FieldSearcher::EXACT);
- }
-}
-
-}
-
-FieldSearchSpec::FieldSearchSpec() :
- _id(0),
- _name(),
- _maxLength(0x100000),
- _searcher(),
- _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE),
- _arg1(),
- _reconfigured(false)
-{
-}
-FieldSearchSpec::~FieldSearchSpec() = default;
-
-FieldSearchSpec::FieldSearchSpec(FieldSearchSpec&& rhs) noexcept = default;
-FieldSearchSpec& FieldSearchSpec::operator=(FieldSearchSpec&& rhs) noexcept = default;
-
-FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & fname,
- VsmfieldsConfig::Fieldspec::Searchmethod searchDef,
- const vespalib::string & arg1, size_t maxLength_) :
- _id(fid),
- _name(fname),
- _maxLength(maxLength_),
- _searcher(),
- _searchMethod(searchDef),
- _arg1(arg1),
- _reconfigured(false)
-{
- switch(searchDef) {
- default:
- LOG(warning, "Unknown searchdef = %d. Defaulting to AUTOUTF8", static_cast<int>(searchDef));
- [[fallthrough]];
- case VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::NONE:
- case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8:
- if (arg1 == "substring") {
- _searcher = std::make_unique<UTF8SubStringFieldSearcher>(fid);
- } else if (arg1 == "suffix") {
- _searcher = std::make_unique<UTF8SuffixStringFieldSearcher>(fid);
- } else if (arg1 == "exact") {
- _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (arg1 == "word") {
- _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (searchDef == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) {
- _searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid);
- } else {
- _searcher = std::make_unique<FUTF8StrChrFieldSearcher>(fid);
- }
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::BOOL:
- _searcher = std::make_unique<BoolFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT16:
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT32:
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT64:
- _searcher = std::make_unique<IntFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::FLOAT:
- _searcher = std::make_unique<FloatFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::DOUBLE:
- _searcher = std::make_unique<DoubleFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::GEOPOS:
- _searcher = std::make_unique<GeoPosFieldSearcher>(fid);
- break;
- }
- if (_searcher) {
- setMatchType(_searcher, arg1);
- _searcher->maxFieldLength(maxLength());
- }
-}
-
-void
-FieldSearchSpec::reconfig(const QueryTerm & term)
-{
- if (_reconfigured) {
- return;
- }
- switch (_searchMethod) {
- case VsmfieldsConfig::Fieldspec::Searchmethod::NONE:
- case VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8:
- if ((term.isSubstring() && _arg1 != "substring") ||
- (term.isSuffix() && _arg1 != "suffix") ||
- (term.isExactstring() && _arg1 != "exact") ||
- (term.isPrefix() && _arg1 == "suffix"))
- {
- _searcher = std::make_unique<UTF8FlexibleStringFieldSearcher>(id());
- // preserve the basic match property of the searcher
- setMatchType(_searcher, _arg1);
- LOG(debug, "Reconfigured to use UTF8FlexibleStringFieldSearcher (%s) for field '%s' with id '%d'",
- _searcher->prefix() ? "prefix" : "regular", name().c_str(), id());
- _reconfigured = true;
- }
- break;
- default:
- break;
- }
-}
-
-vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f)
-{
- os << f._id << ' ' << f._name << ' ';
- if ( ! f._searcher) {
- os << " No searcher defined.\n";
- }
- return os;
-}
-
-FieldSearchSpecMap::FieldSearchSpecMap() = default;
-
-FieldSearchSpecMap::~FieldSearchSpecMap() = default;
-
-namespace {
- const std::string _G_empty("");
- const std::string _G_value(".value");
- const std::regex _G_map1("\\{[a-zA-Z0-9]+\\}");
- const std::regex _G_map2("\\{\".*\"\\}");
- const std::regex _G_array("\\[[0-9]+\\]");
-}
-
-vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & rawIndex)
-{
- if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) {
- std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value);
- index = std::regex_replace(index, _G_map2, _G_value);
- index = std::regex_replace(index, _G_array, _G_empty);
- return index;
- }
- return rawIndex;
-}
-
-bool FieldSearchSpecMap::buildFieldsInQuery(const Query & query, StringFieldIdTMap & fieldsInQuery) const
-{
- bool retval(true);
- ConstQueryTermList qtl;
- query.getLeafs(qtl);
-
- for (const auto & term : qtl) {
- for (const auto & dtm : documentTypeMap()) {
- const IndexFieldMapT & fim = dtm.second;
- vespalib::string rawIndex(term->index());
- vespalib::string index(stripNonFields(rawIndex));
- IndexFieldMapT::const_iterator fIt = fim.find(index);
- if (fIt != fim.end()) {
- for(FieldIdT fid : fIt->second) {
- const FieldSearchSpec & spec = specMap().find(fid)->second;
- LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.c_str(), index.c_str());
- if ((rawIndex != index) && (spec.name().find(index) == 0)) {
- vespalib::string modIndex(rawIndex);
- modIndex.append(spec.name().substr(index.size()));
- fieldsInQuery.add(modIndex, spec.id());
- } else {
- fieldsInQuery.add(spec.name(),spec.id());
- }
- }
- } else {
- LOG(warning, "No valid indexes registered for index %s", term->index().c_str());
- retval = false;
- }
- }
- }
- return retval;
-}
-
-void FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded)
-{
- for(size_t i(0), m(otherFieldsNeeded.size()); i < m; i++) {
- LOG(debug, "otherFieldsNeeded[%zd] = '%s'", i, otherFieldsNeeded[i].c_str());
- _nameIdMap.add(otherFieldsNeeded[i]);
- }
-}
-
-namespace {
-
-FieldIdTList
-buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearchSpecMapT & specMap,
- const VsmfieldsConfig::Documenttype::IndexVector & indexes)
-{
- LOG(spam, "Index %s with %zd fields", ci.name.c_str(), ci.field.size());
- FieldIdTList ifm;
- for (const VsmfieldsConfig::Documenttype::Index::Field & cf : ci.field) {
- LOG(spam, "Parsing field %s", cf.name.c_str());
- auto foundIndex = std::find_if(indexes.begin(), indexes.end(),
- [&cf](const auto & v) { return v.name == cf.name;});
- if ((foundIndex != indexes.end()) && (cf.name != ci.name)) {
- FieldIdTList sub = buildFieldSet(*foundIndex, specMap, indexes);
- ifm.insert(ifm.end(), sub.begin(), sub.end());
- } else {
- auto foundField = std::find_if(specMap.begin(), specMap.end(),
- [&cf](const auto & v) { return v.second.name() == cf.name;} );
- if (foundField != specMap.end()) {
- ifm.push_back(foundField->second.id());
- } else {
- LOG(warning, "Field %s not defined. Ignoring....", cf.name.c_str());
- }
- }
- }
- return ifm;
-}
-
-}
-
-bool FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
-{
- bool retval(true);
- LOG(spam, "Parsing %zd fields", conf->fieldspec.size());
- for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) {
- LOG(spam, "Parsing %s", cfs.name.c_str());
- FieldIdT fieldId = specMap().size();
- FieldSearchSpec fss(fieldId, cfs.name, cfs.searchmethod, cfs.arg1.c_str(), cfs.maxlength);
- _specMap[fieldId] = std::move(fss);
- _nameIdMap.add(cfs.name, fieldId);
- LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str());
- }
-
- LOG(spam, "Parsing %zd document types", conf->documenttype.size());
- for(const VsmfieldsConfig::Documenttype & di : conf->documenttype) {
- IndexFieldMapT indexMapp;
- LOG(spam, "Parsing document type %s with %zd indexes", di.name.c_str(), di.index.size());
- for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) {
- indexMapp[ci.name] = buildFieldSet(ci, specMap(), di.index);
- }
- _documentTypeMap[di.name] = indexMapp;
- }
- return retval;
-}
-
-void
-FieldSearchSpecMap::reconfigFromQuery(const Query & query)
-{
- ConstQueryTermList qtl;
- query.getLeafs(qtl);
-
- for (const auto & termA : qtl) {
- for (const auto & ifm : documentTypeMap()) {
- IndexFieldMapT::const_iterator itc = ifm.second.find(termA->index());
- if (itc != ifm.second.end()) {
- for (FieldIdT fid : itc->second) {
- FieldSearchSpec & spec = _specMap.find(fid)->second;
- spec.reconfig(*termA);
- }
- }
- }
- }
-}
-
-bool lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b)
-{
- return a->field() < b->field();
-}
-
-void FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap)
-{
- fieldSearcherMap.clear();
- for (const auto & entry : fieldsInQuery) {
- FieldIdT fId = entry.second;
- const FieldSearchSpec & spec = specMap().find(fId)->second;
- fieldSearcherMap.emplace_back(spec.searcher().duplicate());
- }
- std::sort(fieldSearcherMap.begin(), fieldSearcherMap.end(), lesserField);
-}
-
-
-vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df)
-{
- os << "DocumentTypeMap = \n";
- for (const auto & dtm : df.documentTypeMap()) {
- os << "DocType = " << dtm.first << "\n";
- os << "IndexMap = \n";
- for (const auto &index : dtm.second) {
- os << index.first << ": ";
- for (FieldIdT fid : index.second) {
- os << fid << ' ';
- }
- os << '\n';
- }
- }
- os << "SpecMap = \n";
- for (const auto & entry : df.specMap()) {
- os << entry.first << " = " << entry.second << '\n';
- }
- os << "NameIdMap = \n" << df.nameIdMap();
- return os;
-}
-
-}