summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-10-19 15:10:19 +0200
committerTor Egge <Tor.Egge@online.no>2022-10-19 15:10:19 +0200
commit046d64903a5d56d7fdcc05c12bd642a52d00c223 (patch)
tree1d5e16aa6eb2fed13c559c0242e1dac3a57dd5bf /searchlib/src
parent3ae444406f7efa0081bcf72a31777beef6dc020f (diff)
Stop handling url data type when inverting uris. It is never used.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp98
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h11
2 files changed, 7 insertions, 102 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
index 9eb629cf33b..c79d856676d 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
@@ -2,7 +2,6 @@
#include "url_field_inverter.h"
#include "field_inverter.h"
-#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
@@ -21,7 +20,6 @@ namespace {
static vespalib::string HOSTNAME_BEGIN("StArThOsT");
static vespalib::string HOSTNAME_END("EnDhOsT");
-const vespalib::string SPANTREE_NAME("linguistics");
static size_t
lowercaseToken(vespalib::string &dest, const char *src, size_t srcSize)
@@ -52,7 +50,6 @@ using document::IntFieldValue;
using document::SpanTree;
using document::StringFieldValue;
using document::StructFieldValue;
-using document::UrlDataType;
using document::WeightedSetFieldValue;
using search::index::Schema;
using search::index::schema::CollectionType;
@@ -112,90 +109,13 @@ UrlFieldInverter::endElement()
}
void
-UrlFieldInverter::processUrlSubField(FieldInverter *inverter,
- const StructFieldValue &field,
- vespalib::stringref subField,
- bool addAnchors)
-{
- const FieldValue::UP sfv = field.getValue(subField);
- if (!sfv) {
- return;
- }
- if (!sfv->isA(FieldValue::Type::STRING)) {
- LOG(error,
- "Illegal field type %s for URL subfield %s, expected string",
- sfv->getDataType()->getName().c_str(),
- vespalib::string(subField).data());
- return;
- }
- const auto &value = static_cast<const StringFieldValue &>(*sfv);
- if (addAnchors) {
- inverter->addWord(HOSTNAME_BEGIN);
- }
- inverter->processAnnotations(value);
- if (addAnchors) {
- inverter->addWord(HOSTNAME_END);
- }
-}
-
-void
-UrlFieldInverter::processAnnotatedUrlField(const StructFieldValue & field)
-{
- processUrlSubField(_all, field, UrlDataType::FIELD_ALL, false);
- processUrlSubField(_scheme, field, UrlDataType::FIELD_SCHEME, false);
- processUrlSubField(_host, field, UrlDataType::FIELD_HOST, false);
- processUrlSubField(_port, field, UrlDataType::FIELD_PORT, false);
- processUrlSubField(_path, field, UrlDataType::FIELD_PATH, false);
- processUrlSubField(_query, field, UrlDataType::FIELD_QUERY, false);
- processUrlSubField(_fragment, field, UrlDataType::FIELD_FRAGMENT, false);
- processUrlSubField(_hostname, field, UrlDataType::FIELD_HOST, true);
-}
-
-void
UrlFieldInverter::processUrlField(const FieldValue &url_field)
{
- if (url_field.isA(FieldValue::Type::STRING)) {
- const vespalib::string &url_str =
- static_cast<const StringFieldValue &>(url_field).getValue();
- processUrlOldStyle(url_str);
- return;
- }
- assert(url_field.isA(FieldValue::Type::STRUCT));
- const auto &field = static_cast<const StructFieldValue &>(url_field);
-
- const FieldValue::UP all_val = field.getValue("all");
- if (all_val.get() == nullptr) {
- if (_useAnnotations) {
- // New style, use annotations
- processAnnotatedUrlField(field);
- }
- return;
- }
-
- if (!all_val->isA(FieldValue::Type::STRING)) {
- LOG(error,
- "Illegal field type %s for URL subfield all, expected string",
- all_val->getDataType()->getName().c_str());
- return;
- }
- const auto &all_sfv = static_cast<const StringFieldValue &>(*all_val);
- if (_useAnnotations) {
- StringFieldValue::SpanTrees trees = all_sfv.getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
- if (tree != nullptr) {
- // New style, use annotations
- processAnnotatedUrlField(field);
- return;
- }
- }
-
- if (_useAnnotations) {
- return;
- }
-
- // Old style, tokenize in backend
- const vespalib::string &s = all_sfv.getValue();
- processUrlOldStyle(s);
+ assert(url_field.isA(FieldValue::Type::STRING));
+ const vespalib::string &url_str =
+ static_cast<const StringFieldValue &>(url_field).getValue();
+ processUrlOldStyle(url_str);
+ return;
}
void
@@ -287,9 +207,7 @@ namespace {
bool
isUriType(const DataType &type)
{
- return type == UrlDataType::getInstance()
- || type == *DataType::STRING
- || type == *DataType::URI;
+ return type == *DataType::STRING || type == *DataType::URI;
}
}
@@ -304,7 +222,7 @@ UrlFieldInverter::invertUrlField(const FieldValue &val)
processUrlField(val);
endElement();
} else {
- throw std::runtime_error(make_string("Expected URI struct, got '%s'", val.getDataType()->getName().c_str()));
+ throw std::runtime_error(make_string("Expected URI field, got '%s'", val.getDataType()->getName().c_str()));
}
break;
case CollectionType::WEIGHTEDSET: {
@@ -403,10 +321,8 @@ UrlFieldInverter::UrlFieldInverter(index::schema::CollectionType collectionType,
_query(query),
_fragment(fragment),
_hostname(hostname),
- _useAnnotations(false),
_collectionType(collectionType)
{
}
}
-
diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h
index 2dbe3c48959..0a38985dac4 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h
@@ -19,7 +19,6 @@ class UrlFieldInverter {
FieldInverter *_fragment;
FieldInverter *_hostname;
- bool _useAnnotations;
index::schema::CollectionType _collectionType;
void startDoc(uint32_t docId);
@@ -30,13 +29,6 @@ class UrlFieldInverter {
void endElement();
- void processUrlSubField(FieldInverter *inverter,
- const document::StructFieldValue &field,
- vespalib::stringref subField,
- bool addAnchors);
-
- void processAnnotatedUrlField(const document::StructFieldValue &field);
-
void processUrlField(const document::FieldValue &url_field);
void processUrlOldStyle(const vespalib::string &s);
@@ -60,9 +52,6 @@ public:
void invertField(uint32_t docId, const document::FieldValue::UP &field);
void removeDocument(uint32_t docId);
- void setUseAnnotations(bool useAnnotations) {
- _useAnnotations = useAnnotations;
- }
void applyRemoves();
void pushDocuments();
};