summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-10-19 22:26:48 +0200
committerGitHub <noreply@github.com>2022-10-19 22:26:48 +0200
commited84f0e74f65585832a260ac5c35aa7c717a8f87 (patch)
treec86d70b7d72d693092803ac02dc3fb2081f9d62b /searchlib
parent7e817e1847ef61e66623cbe5bc52787e5986f452 (diff)
parent2b7934faa4236c5f001a44f177cc7dbea69ee86e (diff)
Merge pull request #24510 from vespa-engine/toregge/remove-use-of-url-data-type-from-backend
Remove use of document::UrlDataType from backend
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp56
-rw-r--r--searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp226
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp98
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h11
-rw-r--r--searchlib/src/vespa/searchlib/test/doc_builder.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/test/doc_builder.h1
-rw-r--r--searchlib/src/vespa/searchlib/test/string_field_builder.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/test/string_field_builder.h2
8 files changed, 26 insertions, 378 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index 1e6cb61d3f4..b9e933ab3e8 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -1,11 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/document/datatype/datatype.h>
-#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
-#include <vespa/document/fieldvalue/structfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/diskindex/fusion.h>
@@ -48,8 +46,7 @@ using namespace index;
using document::ArrayFieldValue;
using document::Document;
-using document::StructFieldValue;
-using document::UrlDataType;
+using document::StringFieldValue;
using document::WeightedSetFieldValue;
using queryeval::RankedSearchIteratorBase;
using queryeval::SearchIterator;
@@ -1179,9 +1176,9 @@ DocBuilder::AddFieldsType
make_uri_add_fields()
{
return [](auto& header) { using namespace document::config_builder;
- header.addField("iu", UrlDataType::getInstance().getId())
- .addField("iau", Array(UrlDataType::getInstance().getId()))
- .addField("iwu", Wset(UrlDataType::getInstance().getId()));
+ header.addField("iu", document::DataType::T_URI)
+ .addField("iau", Array(document::DataType::T_URI))
+ .addField("iwu", Wset(document::DataType::T_URI));
};
}
@@ -1194,51 +1191,16 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
{
Document::UP doc;
StringFieldBuilder sfb(_b);
- sfb.url_mode(true);
- auto url_value = _b.make_url();
doc = _b.make_document("id:ns:searchdocument::10");
- url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.example.com").build());
- url_value.setValue("port", sfb.tokenize("81").build());
- url_value.setValue("path", sfb.tokenize("/fluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("4").build());
- doc->setValue("iu", url_value);
+ doc->setValue("iu", StringFieldValue("http://www.example.com:81/fluke?ab=2#4"));
auto url_array = _b.make_array("iau");
- url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.example.com").build());
- url_value.setValue("port", sfb.tokenize("82").build());
- url_value.setValue("path", sfb.tokenize("/fluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("8").build());
- url_array.add(url_value);
- url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
- url_value.setValue("path", sfb.tokenize("/fluke").build());
- url_value.setValue("fragment", sfb.tokenize("9").build());
- url_array.add(url_value);
+ url_array.add(StringFieldValue("http://www.example.com:82/fluke?ab=2#8"));
+ url_array.add(StringFieldValue("http://www.flickr.com:82/fluke?ab=2#9"));
doc->setValue("iau", url_array);
auto url_wset = _b.make_wset("iwu");
- url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.example.com").build());
- url_value.setValue("port", sfb.tokenize("83").build());
- url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("12").build());
- url_wset.add(url_value, 4);
- url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
- url_value.setValue("port", sfb.tokenize("85").build());
- url_value.setValue("path", sfb.tokenize("/fluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("13").build());
- url_wset.add(url_value, 7);
+ url_wset.add(StringFieldValue("http://www.example.com:83/fluke?ab=2#12"), 4);
+ url_wset.add(StringFieldValue("http://www.flickr.com:85/fluke?ab=2#13"), 7);
doc->setValue("iwu", url_wset);
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
index b3892d5d69a..9543b109dc4 100644
--- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
@@ -1,11 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/memoryindex/url_field_inverter.h>
-#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
-#include <vespa/document/fieldvalue/structfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/fixedtyperepo.h>
@@ -16,7 +14,6 @@
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/word_store.h>
#include <vespa/searchlib/test/doc_builder.h>
-#include <vespa/searchlib/test/string_field_builder.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter_backend.h>
#include <vespa/vespalib/gtest/gtest.h>
@@ -25,13 +22,11 @@ namespace search {
using document::Document;
using document::ArrayFieldValue;
-using document::StructFieldValue;
-using document::UrlDataType;
+using document::StringFieldValue;
using document::WeightedSetFieldValue;
using index::schema::CollectionType;
using index::schema::DataType;
using search::test::DocBuilder;
-using search::test::StringFieldBuilder;
using namespace index;
@@ -45,17 +40,7 @@ Document::UP
makeDoc10Single(DocBuilder &b)
{
auto doc = b.make_document("id:ns:searchdocument::10");
- auto url_value = b.make_struct("url");
- StringFieldBuilder sfb(b);
- sfb.url_mode(true);
- url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.example.com").build());
- url_value.setValue("port", sfb.tokenize("81").build());
- url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("4").build());
- doc->setValue("url", url_value);
+ doc->setValue("url", StringFieldValue("http://www.example.com:81/fluke?ab=2#4"));
return doc;
}
@@ -63,24 +48,9 @@ Document::UP
makeDoc10Array(DocBuilder &b)
{
auto doc = b.make_document("id:ns:searchdocument::10");
- StringFieldBuilder sfb(b);
- sfb.url_mode(true);
auto url_array = b.make_array("url");
- auto url_value = b.make_url();
- url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.example.com").build());
- url_value.setValue("port", sfb.tokenize("82").build());
- url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("8").build());
- url_array.add(url_value);
- url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
- url_value.setValue("path", sfb.tokenize("/fluke").build());
- url_value.setValue("fragment", sfb.tokenize("9").build());
- url_array.add(url_value);
+ url_array.add(StringFieldValue("http://www.example.com:82/fluke?ab=2#8"));
+ url_array.add(StringFieldValue("http://www.flickr.com:82/fluke?ab=2#9"));
doc->setValue("url", url_array);
return doc;
}
@@ -89,26 +59,9 @@ Document::UP
makeDoc10WeightedSet(DocBuilder &b)
{
auto doc = b.make_document("id:ns:searchdocument::10");
- StringFieldBuilder sfb(b);
- sfb.url_mode(true);
auto url_wset = b.make_wset("url");
- auto url_value = b.make_url();
- url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.example.com").build());
- url_value.setValue("port", sfb.tokenize("83").build());
- url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("12").build());
- url_wset.add(url_value, 4);
- url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build());
- url_value.setValue("scheme", sfb.tokenize("http").build());
- url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
- url_value.setValue("port", sfb.tokenize("85").build());
- url_value.setValue("path", sfb.tokenize("/fluke").build());
- url_value.setValue("query", sfb.tokenize("ab=2").build());
- url_value.setValue("fragment", sfb.tokenize("13").build());
- url_wset.add(url_value, 7);
+ url_wset.add(StringFieldValue("http://www.example.com:83/fluke?ab=2#12"), 4);
+ url_wset.add(StringFieldValue("http://www.flickr.com:85/fluke?ab=2#13"), 7);
doc->setValue("url", url_wset);
return doc;
}
@@ -187,27 +140,23 @@ struct UrlFieldInverterTest : public ::testing::Test {
inverter->pushDocuments();
}
}
-
- void enableAnnotations() {
- _urlInverter->setUseAnnotations(true);
- }
};
UrlFieldInverterTest::~UrlFieldInverterTest() = default;
DocBuilder::AddFieldsType
add_single_url = [](auto& header) {
- header.addField("url", UrlDataType::getInstance().getId()); };
+ header.addField("url", document::DataType::T_URI); };
DocBuilder::AddFieldsType
add_array_url = [](auto& header) {
using namespace document::config_builder;
- header.addField("url", Array(UrlDataType::getInstance().getId())); };
+ header.addField("url", Array(document::DataType::T_URI)); };
DocBuilder::AddFieldsType
add_wset_url = [](auto& header) {
using namespace document::config_builder;
- header.addField("url", Wset(UrlDataType::getInstance().getId())); };
+ header.addField("url", Wset(document::DataType::T_URI)); };
@@ -350,139 +299,6 @@ TEST_F(WeightedSetInverterTest, require_that_weighted_set_field_works)
_inserter_backend.toStr());
}
-TEST_F(SingleInverterTest, require_that_annotated_single_url_field_works)
-{
- enableAnnotations();
- invertDocument(10, *makeDoc10Single(_b));
- pushDocuments();
- EXPECT_EQ("f=0,"
- "w=2,a=10,"
- "w=4,a=10,"
- "w=81,a=10,"
- "w=ab,a=10,"
- "w=com,a=10,"
- "w=example,a=10,"
- "w=fluke,a=10,"
- "w=http,a=10,"
- "w=www,a=10,"
- "f=1,"
- "w=http,a=10,"
- "f=2,"
- "w=com,a=10,"
- "w=example,a=10,"
- "w=www,a=10,"
- "f=3,"
- "w=81,a=10,"
- "f=4,"
- "w=altfluke,a=10,"
- "w=fluke,a=10,"
- "f=5,"
- "w=2,a=10,"
- "w=ab,a=10,"
- "f=6,"
- "w=4,a=10,"
- "f=7,"
- "w=EnDhOsT,a=10,"
- "w=StArThOsT,a=10,"
- "w=com,a=10,"
- "w=example,a=10,"
- "w=www,a=10",
- _inserter_backend.toStr());
-}
-
-TEST_F(ArrayInverterTest, require_that_annotated_array_url_field_works)
-{
- enableAnnotations();
- invertDocument(10, *makeDoc10Array(_b));
- pushDocuments();
- EXPECT_EQ("f=0,"
- "w=2,a=10,"
- "w=8,a=10,"
- "w=82,a=10,"
- "w=9,a=10,"
- "w=ab,a=10,"
- "w=com,a=10,"
- "w=example,a=10,"
- "w=flickr,a=10,"
- "w=fluke,a=10,"
- "w=http,a=10,"
- "w=www,a=10,"
- "f=1,"
- "w=http,a=10,"
- "f=2,"
- "w=com,a=10,"
- "w=example,a=10,"
- "w=flickr,a=10,"
- "w=www,a=10,"
- "f=3,"
- "w=82,a=10,"
- "f=4,"
- "w=altfluke,a=10,"
- "w=fluke,a=10,"
- "f=5,"
- "w=2,a=10,"
- "w=ab,a=10,"
- "f=6,"
- "w=8,a=10,"
- "w=9,a=10,"
- "f=7,"
- "w=EnDhOsT,a=10,"
- "w=StArThOsT,a=10,"
- "w=com,a=10,"
- "w=example,a=10,"
- "w=flickr,a=10,"
- "w=www,a=10",
- _inserter_backend.toStr());
-}
-
-TEST_F(WeightedSetInverterTest, require_that_annotated_weighted_set_field_works)
-{
- enableAnnotations();
- _inserter_backend.setVerbose();
- invertDocument(10, *makeDoc10WeightedSet(_b));
- pushDocuments();
- EXPECT_EQ("f=0,"
- "w=12,a=10(e=0,w=4,l=9[8]),"
- "w=13,a=10(e=1,w=7,l=9[8]),"
- "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7]),"
- "w=83,a=10(e=0,w=4,l=9[4]),"
- "w=85,a=10(e=1,w=7,l=9[4]),"
- "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6]),"
- "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3]),"
- "w=example,a=10(e=0,w=4,l=9[2]),"
- "w=flickr,a=10(e=1,w=7,l=9[2]),"
- "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5]),"
- "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0]),"
- "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1]),"
- "f=1,"
- "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0]),"
- "f=2,"
- "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2]),"
- "w=example,a=10(e=0,w=4,l=3[1]),"
- "w=flickr,a=10(e=1,w=7,l=3[1]),"
- "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0]),"
- "f=3,"
- "w=83,a=10(e=0,w=4,l=1[0]),"
- "w=85,a=10(e=1,w=7,l=1[0]),"
- "f=4,"
- "w=altfluke,a=10(e=0,w=4,l=1[0]),"
- "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0]),"
- "f=5,"
- "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1]),"
- "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0]),"
- "f=6,"
- "w=12,a=10(e=0,w=4,l=1[0]),"
- "w=13,a=10(e=1,w=7,l=1[0]),"
- "f=7,"
- "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4]),"
- "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0]),"
- "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3]),"
- "w=example,a=10(e=0,w=4,l=5[2]),"
- "w=flickr,a=10(e=1,w=7,l=5[2]),"
- "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1])",
- _inserter_backend.toStr());
-}
-
TEST_F(SingleInverterTest, require_that_empty_single_field_works)
{
invertDocument(10, *makeDoc10Empty(_b));
@@ -505,30 +321,6 @@ TEST_F(WeightedSetInverterTest, require_that_empty_weighted_set_field_works)
EXPECT_EQ("", _inserter_backend.toStr());
}
-TEST_F(SingleInverterTest, require_that_annotated_empty_single_field_works)
-{
- enableAnnotations();
- invertDocument(10, *makeDoc10Empty(_b));
- pushDocuments();
- EXPECT_EQ("", _inserter_backend.toStr());
-}
-
-TEST_F(ArrayInverterTest, require_that_annotated_empty_array_field_works)
-{
- enableAnnotations();
- invertDocument(10, *makeDoc10Empty(_b));
- pushDocuments();
- EXPECT_EQ("", _inserter_backend.toStr());
-}
-
-TEST_F(WeightedSetInverterTest, require_that_annotated_empty_weighted_set_field_works)
-{
- enableAnnotations();
- invertDocument(10, *makeDoc10Empty(_b));
- pushDocuments();
- EXPECT_EQ("", _inserter_backend.toStr());
-}
-
}
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
index 9eb629cf33b..c79d856676d 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
@@ -2,7 +2,6 @@
#include "url_field_inverter.h"
#include "field_inverter.h"
-#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
@@ -21,7 +20,6 @@ namespace {
static vespalib::string HOSTNAME_BEGIN("StArThOsT");
static vespalib::string HOSTNAME_END("EnDhOsT");
-const vespalib::string SPANTREE_NAME("linguistics");
static size_t
lowercaseToken(vespalib::string &dest, const char *src, size_t srcSize)
@@ -52,7 +50,6 @@ using document::IntFieldValue;
using document::SpanTree;
using document::StringFieldValue;
using document::StructFieldValue;
-using document::UrlDataType;
using document::WeightedSetFieldValue;
using search::index::Schema;
using search::index::schema::CollectionType;
@@ -112,90 +109,13 @@ UrlFieldInverter::endElement()
}
void
-UrlFieldInverter::processUrlSubField(FieldInverter *inverter,
- const StructFieldValue &field,
- vespalib::stringref subField,
- bool addAnchors)
-{
- const FieldValue::UP sfv = field.getValue(subField);
- if (!sfv) {
- return;
- }
- if (!sfv->isA(FieldValue::Type::STRING)) {
- LOG(error,
- "Illegal field type %s for URL subfield %s, expected string",
- sfv->getDataType()->getName().c_str(),
- vespalib::string(subField).data());
- return;
- }
- const auto &value = static_cast<const StringFieldValue &>(*sfv);
- if (addAnchors) {
- inverter->addWord(HOSTNAME_BEGIN);
- }
- inverter->processAnnotations(value);
- if (addAnchors) {
- inverter->addWord(HOSTNAME_END);
- }
-}
-
-void
-UrlFieldInverter::processAnnotatedUrlField(const StructFieldValue & field)
-{
- processUrlSubField(_all, field, UrlDataType::FIELD_ALL, false);
- processUrlSubField(_scheme, field, UrlDataType::FIELD_SCHEME, false);
- processUrlSubField(_host, field, UrlDataType::FIELD_HOST, false);
- processUrlSubField(_port, field, UrlDataType::FIELD_PORT, false);
- processUrlSubField(_path, field, UrlDataType::FIELD_PATH, false);
- processUrlSubField(_query, field, UrlDataType::FIELD_QUERY, false);
- processUrlSubField(_fragment, field, UrlDataType::FIELD_FRAGMENT, false);
- processUrlSubField(_hostname, field, UrlDataType::FIELD_HOST, true);
-}
-
-void
UrlFieldInverter::processUrlField(const FieldValue &url_field)
{
- if (url_field.isA(FieldValue::Type::STRING)) {
- const vespalib::string &url_str =
- static_cast<const StringFieldValue &>(url_field).getValue();
- processUrlOldStyle(url_str);
- return;
- }
- assert(url_field.isA(FieldValue::Type::STRUCT));
- const auto &field = static_cast<const StructFieldValue &>(url_field);
-
- const FieldValue::UP all_val = field.getValue("all");
- if (all_val.get() == nullptr) {
- if (_useAnnotations) {
- // New style, use annotations
- processAnnotatedUrlField(field);
- }
- return;
- }
-
- if (!all_val->isA(FieldValue::Type::STRING)) {
- LOG(error,
- "Illegal field type %s for URL subfield all, expected string",
- all_val->getDataType()->getName().c_str());
- return;
- }
- const auto &all_sfv = static_cast<const StringFieldValue &>(*all_val);
- if (_useAnnotations) {
- StringFieldValue::SpanTrees trees = all_sfv.getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
- if (tree != nullptr) {
- // New style, use annotations
- processAnnotatedUrlField(field);
- return;
- }
- }
-
- if (_useAnnotations) {
- return;
- }
-
- // Old style, tokenize in backend
- const vespalib::string &s = all_sfv.getValue();
- processUrlOldStyle(s);
+ assert(url_field.isA(FieldValue::Type::STRING));
+ const vespalib::string &url_str =
+ static_cast<const StringFieldValue &>(url_field).getValue();
+ processUrlOldStyle(url_str);
+ return;
}
void
@@ -287,9 +207,7 @@ namespace {
bool
isUriType(const DataType &type)
{
- return type == UrlDataType::getInstance()
- || type == *DataType::STRING
- || type == *DataType::URI;
+ return type == *DataType::STRING || type == *DataType::URI;
}
}
@@ -304,7 +222,7 @@ UrlFieldInverter::invertUrlField(const FieldValue &val)
processUrlField(val);
endElement();
} else {
- throw std::runtime_error(make_string("Expected URI struct, got '%s'", val.getDataType()->getName().c_str()));
+ throw std::runtime_error(make_string("Expected URI field, got '%s'", val.getDataType()->getName().c_str()));
}
break;
case CollectionType::WEIGHTEDSET: {
@@ -403,10 +321,8 @@ UrlFieldInverter::UrlFieldInverter(index::schema::CollectionType collectionType,
_query(query),
_fragment(fragment),
_hostname(hostname),
- _useAnnotations(false),
_collectionType(collectionType)
{
}
}
-
diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h
index 2dbe3c48959..0a38985dac4 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h
@@ -19,7 +19,6 @@ class UrlFieldInverter {
FieldInverter *_fragment;
FieldInverter *_hostname;
- bool _useAnnotations;
index::schema::CollectionType _collectionType;
void startDoc(uint32_t docId);
@@ -30,13 +29,6 @@ class UrlFieldInverter {
void endElement();
- void processUrlSubField(FieldInverter *inverter,
- const document::StructFieldValue &field,
- vespalib::stringref subField,
- bool addAnchors);
-
- void processAnnotatedUrlField(const document::StructFieldValue &field);
-
void processUrlField(const document::FieldValue &url_field);
void processUrlOldStyle(const vespalib::string &s);
@@ -60,9 +52,6 @@ public:
void invertField(uint32_t docId, const document::FieldValue::UP &field);
void removeDocument(uint32_t docId);
- void setUseAnnotations(bool useAnnotations) {
- _useAnnotations = useAnnotations;
- }
void applyRemoves();
void pushDocuments();
};
diff --git a/searchlib/src/vespa/searchlib/test/doc_builder.cpp b/searchlib/src/vespa/searchlib/test/doc_builder.cpp
index 2312bf1d6bf..4ed64b9bfe6 100644
--- a/searchlib/src/vespa/searchlib/test/doc_builder.cpp
+++ b/searchlib/src/vespa/searchlib/test/doc_builder.cpp
@@ -108,10 +108,4 @@ DocBuilder::make_struct(vespalib::stringref field_name)
return {field_type};
}
-StructFieldValue
-DocBuilder::make_url()
-{
- return {get_data_type("url")};
-}
-
}
diff --git a/searchlib/src/vespa/searchlib/test/doc_builder.h b/searchlib/src/vespa/searchlib/test/doc_builder.h
index 75dbc30a0fb..1f652694b0a 100644
--- a/searchlib/src/vespa/searchlib/test/doc_builder.h
+++ b/searchlib/src/vespa/searchlib/test/doc_builder.h
@@ -45,7 +45,6 @@ public:
document::MapFieldValue make_map(vespalib::stringref field_name);
document::WeightedSetFieldValue make_wset(vespalib::stringref field_name);
document::StructFieldValue make_struct(vespalib::stringref field_name);
- document::StructFieldValue make_url();
};
}
diff --git a/searchlib/src/vespa/searchlib/test/string_field_builder.cpp b/searchlib/src/vespa/searchlib/test/string_field_builder.cpp
index 1510a306875..4d5637ee533 100644
--- a/searchlib/src/vespa/searchlib/test/string_field_builder.cpp
+++ b/searchlib/src/vespa/searchlib/test/string_field_builder.cpp
@@ -37,7 +37,6 @@ StringFieldBuilder::StringFieldBuilder(const DocBuilder& doc_builder)
_span_list(nullptr),
_span_tree(),
_last_span(nullptr),
- _url_mode(false),
_repo(doc_builder.get_repo(), doc_builder.get_document_type())
{
}
@@ -100,8 +99,7 @@ StringFieldBuilder::tokenize(const vespalib::string& val)
while (reader.hasMore()) {
c = reader.getChar();
- bool new_word = Fast_UnicodeUtil::IsWordChar(c) ||
- (_url_mode && (c == '-' || c == '_'));
+ bool new_word = Fast_UnicodeUtil::IsWordChar(c);
if (old_word != new_word) {
if (!token_buffer.empty()) {
token(token_buffer, old_word);
diff --git a/searchlib/src/vespa/searchlib/test/string_field_builder.h b/searchlib/src/vespa/searchlib/test/string_field_builder.h
index 94c2bfc2fe8..13a3b16db85 100644
--- a/searchlib/src/vespa/searchlib/test/string_field_builder.h
+++ b/searchlib/src/vespa/searchlib/test/string_field_builder.h
@@ -26,14 +26,12 @@ class StringFieldBuilder {
document::SpanList* _span_list; // owned by _span_tree
std::unique_ptr<document::SpanTree> _span_tree;
const document::SpanNode* _last_span;
- bool _url_mode;
const document::FixedTypeRepo _repo;
void start_annotate();
void add_span();
public:
StringFieldBuilder(const DocBuilder& doc_builder);
~StringFieldBuilder();
- StringFieldBuilder& url_mode(bool url_mode_) noexcept { _url_mode = url_mode_; return *this; }
StringFieldBuilder& token(const vespalib::string& val, bool is_word);
StringFieldBuilder& word(const vespalib::string& val) { return token(val, true); }
StringFieldBuilder& space() { return token(" ", false); }