diff options
Diffstat (limited to 'searchlib/src/tests/memoryindex')
5 files changed, 402 insertions, 463 deletions
diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp index 3f8a04d9460..83746b611fb 100644 --- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp @@ -1,8 +1,13 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/searchlib/index/docbuilder.h> -#include <vespa/searchlib/index/field_length_calculator.h> #include <vespa/searchlib/memoryindex/document_inverter.h> +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> @@ -19,9 +24,10 @@ namespace search::memoryindex { using document::Document; -using index::DocBuilder; +using index::EmptyDocBuilder; using index::FieldLengthCalculator; using index::Schema; +using index::StringFieldBuilder; using index::schema::CollectionType; using index::schema::DataType; using vespalib::SequencedTaskExecutor; @@ -29,64 +35,68 @@ using vespalib::ISequencedTaskExecutor; namespace { +EmptyDocBuilder::AddFieldsType +make_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + Document::UP -makeDoc10(DocBuilder &b) +makeDoc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); + return doc; } Document::UP -makeDoc11(DocBuilder &b) +makeDoc11(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::11"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("e").addStr("f"). - endField(); - b.startIndexField("f1"). - addStr("a").addStr("g"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::11"); + doc->setValue("f0", sfb.tokenize("a b e f").build()); + doc->setValue("f1", sfb.tokenize("a g").build()); + return doc; } Document::UP -makeDoc12(DocBuilder &b) +makeDoc12(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::12"); - b.startIndexField("f0"). - addStr("h").addStr("doc12"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::12"); + doc->setValue("f0", sfb.tokenize("h doc12").build()); + return doc; } Document::UP -makeDoc13(DocBuilder &b) +makeDoc13(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::13"); - b.startIndexField("f0"). - addStr("i").addStr("doc13"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::13"); + doc->setValue("f0", sfb.tokenize("i doc13").build()); + return doc; } Document::UP -makeDoc14(DocBuilder &b) +makeDoc14(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::14"); - b.startIndexField("f0"). - addStr("j").addStr("doc14"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::14"); + doc->setValue("f0", sfb.tokenize("j doc14").build()); + return doc; } Document::UP -makeDoc15(DocBuilder &b) +makeDoc15(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::15"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::15"); } } @@ -96,7 +106,7 @@ VESPA_THREAD_STACK_TAG(push_executor) struct DocumentInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; WordStore _word_store; @@ -118,7 +128,7 @@ struct DocumentInverterTest : public ::testing::Test { DocumentInverterTest() : _schema(makeSchema()), - _b(_schema), + _b(make_add_fields()), _invertThreads(SequencedTaskExecutor::create(invert_executor, 1)), _pushThreads(SequencedTaskExecutor::create(push_executor, 1)), _word_store(), diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index dcca1f136f6..04d1f08db6f 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -1,13 +1,22 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> #include <vespa/searchlib/fef/fieldpositionsiterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/document_inverter.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> #include <vespa/searchlib/memoryindex/field_index_collection.h> @@ -37,7 +46,11 @@ namespace search { using namespace fef; using namespace index; +using document::ArrayFieldValue; using document::Document; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; using queryeval::RankedSearchIteratorBase; using queryeval::SearchIterator; using search::index::schema::CollectionType; @@ -505,6 +518,12 @@ make_single_field_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_single_add_fields() +{ + return [](auto& header) { header.addField("f0", document::DataType::T_STRING); }; +} + template <typename FieldIndexType> struct FieldIndexTest : public ::testing::Test { Schema schema; @@ -706,6 +725,18 @@ make_multi_field_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_multi_field_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + struct FieldIndexCollectionTest : public ::testing::Test { Schema schema; FieldIndexCollection fic; @@ -907,16 +938,16 @@ class InverterTest : public ::testing::Test { public: Schema _schema; FieldIndexCollection _fic; - DocBuilder _b; + EmptyDocBuilder _b; std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; DocumentInverterContext _inv_context; DocumentInverter _inv; - InverterTest(const Schema& schema) + InverterTest(const Schema& schema, EmptyDocBuilder::AddFieldsType add_fields) : _schema(schema), _fic(_schema, MockFieldLengthInspector()), - _b(_schema), + _b(add_fields), _invertThreads(SequencedTaskExecutor::create(invert_executor, 2)), _pushThreads(SequencedTaskExecutor::create(push_executor, 2)), _inv_context(_schema, *_invertThreads, *_pushThreads, _fic), @@ -938,91 +969,63 @@ public: class BasicInverterTest : public InverterTest { public: - BasicInverterTest() : InverterTest(make_multi_field_schema()) {} + BasicInverterTest() : InverterTest(make_multi_field_schema(), make_multi_field_add_fields()) {} }; TEST_F(BasicInverterTest, require_that_inversion_is_working) { Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::20"); - _b.startIndexField("f0"). - addStr("a").addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::20"); + doc->setValue("f0", sfb.tokenize("a a b c d").build()); _inv.invertDocument(20, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::30"); - _b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - addStr("e").addStr("f"). - endField(); - _b.startIndexField("f1"). - addStr("\nw2").addStr("w").addStr("x"). - addStr("\nw3").addStr("y").addStr("z"). - endField(); - _b.startIndexField("f2"). - startElement(4). - addStr("w").addStr("x"). - endElement(). - startElement(5). - addStr("y").addStr("z"). - endElement(). - endField(); - _b.startIndexField("f3"). - startElement(6). - addStr("w").addStr("x"). - endElement(). - startElement(7). - addStr("y").addStr("z"). - endElement(). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::30"); + doc->setValue("f0", sfb.tokenize("a b c d e f").build()); + doc->setValue("f1", sfb.word("\nw2").tokenize(" w x "). + word("\nw3").tokenize(" y z").build()); + { + ArrayFieldValue string_array(_b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("w x").build()); + string_array.add(sfb.tokenize("y z").build()); + doc->setValue("f2", string_array); + } + { + WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("w x").build(), 6); + string_wset.add(sfb.tokenize("y z").build(), 7); + doc->setValue("f3", string_wset); + } _inv.invertDocument(30, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::40"); - _b.startIndexField("f0"). - addStr("a").addStr("a").addStr("b").addStr("c").addStr("a"). - addStr("e").addStr("f"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::40"); + doc->setValue("f0", sfb.tokenize("a a b c a e f").build()); _inv.invertDocument(40, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::999"); - _b.startIndexField("f0"). - addStr("this").addStr("is").addStr("_a_").addStr("test"). - addStr("for").addStr("insertion").addStr("speed").addStr("with"). - addStr("more").addStr("than").addStr("just").addStr("__a__"). - addStr("few").addStr("words").addStr("present").addStr("in"). - addStr("some").addStr("of").addStr("the").addStr("fields"). - endField(); - _b.startIndexField("f1"). - addStr("the").addStr("other").addStr("field").addStr("also"). - addStr("has").addStr("some").addStr("content"). - endField(); - _b.startIndexField("f2"). - startElement(1). - addStr("strange").addStr("things").addStr("here"). - addStr("has").addStr("some").addStr("content"). - endElement(). - endField(); - _b.startIndexField("f3"). - startElement(3). - addStr("not").addStr("a").addStr("weighty").addStr("argument"). - endElement(). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::999"); + doc->setValue("f0", sfb.tokenize("this is ").word("_a_"). + tokenize(" test for insertion speed with more than just "). + word("__a__").tokenize(" few words present in some of the fields").build()); + doc->setValue("f1", sfb.tokenize("the other field also has some content").build()); + { + ArrayFieldValue string_array(_b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("strange things here has some content").build()); + doc->setValue("f2", string_array); + } + { + WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("not a weighty argument").build(), 3); + doc->setValue("f3", string_wset); + } for (uint32_t docId = 10000; docId < 20000; ++docId) { _inv.invertDocument(docId, *doc, {}); myPushDocument(_inv); @@ -1132,19 +1135,17 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remover) { - Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::1"); - _b.startIndexField("f0").addStr("a").addStr("b").endField(); - _b.startIndexField("f1").addStr("a").addStr("c").endField(); - Document::UP doc1 = _b.endDocument(); - _inv.invertDocument(1, *doc1.get(), {}); + auto doc1 = _b.make_document("id:ns:searchdocument::1"); + doc1->setValue("f0", sfb.tokenize("a b").build()); + doc1->setValue("f1", sfb.tokenize("a c").build()); + _inv.invertDocument(1, *doc1, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::2"); - _b.startIndexField("f0").addStr("b").addStr("c").endField(); - Document::UP doc2 = _b.endDocument(); - _inv.invertDocument(2, *doc2.get(), {}); + auto doc2 = _b.make_document("id:ns:searchdocument::2"); + doc2->setValue("f0", sfb.tokenize("b c").build()); + _inv.invertDocument(2, *doc2, {}); myPushDocument(_inv); EXPECT_TRUE(assertPostingList("[1]", find("a", 0))); @@ -1172,136 +1173,71 @@ make_uri_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_uri_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + header.addField("iu", UrlDataType::getInstance().getId()) + .addField("iau", Array(UrlDataType::getInstance().getId())) + .addField("iwu", Wset(UrlDataType::getInstance().getId())); + }; +} + class UriInverterTest : public InverterTest { public: - UriInverterTest() : InverterTest(make_uri_schema()) {} + UriInverterTest() : InverterTest(make_uri_schema(), make_uri_add_fields()) {} }; TEST_F(UriInverterTest, require_that_uri_indexing_is_working) { Document::UP doc; - - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("iu"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - _b.startIndexField("iau"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - _b.startIndexField("iwu"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - doc = _b.endDocument(); + StringFieldBuilder sfb(_b); + sfb.url_mode(true); + StructFieldValue url_value(_b.get_data_type("url")); + + doc = _b.make_document("id:ns:searchdocument::10"); + url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("81").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("4").build()); + doc->setValue("iu", url_value); + ArrayFieldValue url_array(_b.get_data_type("Array<url>")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("82").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("8").build()); + url_array.add(url_value); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("fragment", sfb.tokenize("9").build()); + url_array.add(url_value); + doc->setValue("iau", url_array); + WeightedSetFieldValue url_wset(_b.get_data_type("WeightedSet<url>")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("83").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("12").build()); + url_wset.add(url_value, 4); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("port", sfb.tokenize("85").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("13").build()); + url_wset.add(url_value, 7); + doc->setValue("iwu", url_wset); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); @@ -1360,21 +1296,16 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) class CjkInverterTest : public InverterTest { public: - CjkInverterTest() : InverterTest(make_single_field_schema()) {} + CjkInverterTest() : InverterTest(make_single_field_schema(), make_single_add_fields()) {} }; TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) { Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("f0"). - addStr("我就是那个"). - setAutoSpace(false). - addStr("大灰狼"). - setAutoSpace(true). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.word("我就是那个").word("大灰狼").build()); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp index ed049a82c42..bf3a911a579 100644 --- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp @@ -1,8 +1,14 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> @@ -13,9 +19,12 @@ namespace search { +using document::ArrayFieldValue; using document::Document; -using index::DocBuilder; +using document::WeightedSetFieldValue; +using index::EmptyDocBuilder; using index::Schema; +using index::StringFieldBuilder; using index::schema::CollectionType; using index::schema::DataType; @@ -26,93 +35,91 @@ namespace memoryindex { namespace { Document::UP -makeDoc10(DocBuilder &b) +makeDoc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); + return doc; } Document::UP -makeDoc11(DocBuilder &b) +makeDoc11(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::11"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("e").addStr("f"). - endField(); - b.startIndexField("f1"). - addStr("a").addStr("g"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::11"); + doc->setValue("f0", sfb.tokenize("a b e f").build()); + doc->setValue("f1", sfb.tokenize("a g").build()); + return doc; } Document::UP -makeDoc12(DocBuilder &b) +makeDoc12(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::12"); - b.startIndexField("f0"). - addStr("h").addStr("doc12"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::12"); + doc->setValue("f0", sfb.tokenize("h doc12").build()); + return doc; } Document::UP -makeDoc13(DocBuilder &b) +makeDoc13(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::13"); - b.startIndexField("f0"). - addStr("i").addStr("doc13"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::13"); + doc->setValue("f0", sfb.tokenize("i doc13").build()); + return doc; } Document::UP -makeDoc14(DocBuilder &b) +makeDoc14(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::14"); - b.startIndexField("f0"). - addStr("j").addStr("doc14"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::14"); + doc->setValue("f0", sfb.tokenize("j doc14").build()); + return doc; } Document::UP -makeDoc15(DocBuilder &b) +makeDoc15(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::15"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::15"); } Document::UP -makeDoc16(DocBuilder &b) +makeDoc16(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::16"); - b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz"). - addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty"). - addStr("z").endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::16"); + doc->setValue("f0", sfb.tokenize("foo bar baz").alt_word("altbaz").tokenize(" y").alt_word("alty").tokenize(" z").build()); + return doc; } Document::UP -makeDoc17(DocBuilder &b) +makeDoc17(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::17"); - b.startIndexField("f1").addStr("foo0").addStr("bar0").endField(); - b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField(); - b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::17"); + doc->setValue("f1", sfb.tokenize("foo0 bar0").build()); + ArrayFieldValue string_array(b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("foo bar").build()); + string_array.add(sfb.tokenize("bar").build()); + doc->setValue("f2", string_array); + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("foo2 bar2").build(), 3); + string_wset.add(sfb.tokenize("bar2").build(), 4); + doc->setValue("f3", string_wset); + return doc; } vespalib::string corruptWord = "corruptWord"; Document::UP -makeCorruptDocument(DocBuilder &b, size_t wordOffset) +makeCorruptDocument(EmptyDocBuilder &b, size_t wordOffset) { - b.startDocument("id:ns:searchdocument::18"); - b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField(); - auto doc = b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::18"); + doc->setValue("f0", sfb.tokenize("before ").word(corruptWord).tokenize(" after z").build()); vespalib::nbostream stream; doc->serialize(stream); std::vector<char> raw; @@ -127,14 +134,14 @@ makeCorruptDocument(DocBuilder &b, size_t wordOffset) } vespalib::nbostream badstream; badstream.write(&raw[0], raw.size()); - return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream); + return std::make_unique<Document>(b.get_repo(), badstream); } } struct FieldInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; WordStore _word_store; FieldIndexRemover _remover; test::OrderedFieldIndexInserterBackend _inserter_backend; @@ -151,9 +158,21 @@ struct FieldInverterTest : public ::testing::Test { return schema; } + static EmptyDocBuilder::AddFieldsType + make_add_fields() + { + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; + } + FieldInverterTest() : _schema(makeSchema()), - _b(_schema), + _b(make_add_fields()), _word_store(), _remover(_word_store), _inserter_backend(), diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp index b3ea948dfa7..1730e34adb5 100644 --- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp +++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp @@ -1,11 +1,15 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/common/scheduletaskcallback.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/matchdatalayout.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/i_field_length_inspector.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/memory_index.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> @@ -59,6 +63,12 @@ struct MySetup : public IFieldLengthInspector { } return FieldLengthInfo(); } + void add_fields(document::config_builder::Struct& header) const { + for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) { + auto& field = schema.getIndexField(i); + header.addField(field.getName(), document::DataType::T_STRING); + } + } }; @@ -70,31 +80,38 @@ struct Index { std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; MemoryIndex index; - DocBuilder builder; + EmptyDocBuilder builder; + StringFieldBuilder sfb; + std::unique_ptr<Document> builder_doc; uint32_t docid; std::string currentField; + bool add_space; Index(const MySetup &setup); ~Index(); void closeField() { if (!currentField.empty()) { - builder.endField(); + builder_doc->setValue(currentField, sfb.build()); currentField.clear(); } } Index &doc(uint32_t id) { docid = id; - builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", id)); + builder_doc = builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", id)); return *this; } Index &field(const std::string &name) { closeField(); - builder.startIndexField(name); currentField = name; + add_space = false; return *this; } Index &add(const std::string &token) { - builder.addStr(token); + if (add_space) { + sfb.space(); + } + add_space = true; + sfb.word(token); return *this; } void internalSyncCommit() { @@ -106,7 +123,7 @@ struct Index { } Document::UP commit() { closeField(); - Document::UP d = builder.endDocument(); + Document::UP d = std::move(builder_doc); index.insertDocument(docid, *d, {}); internalSyncCommit(); return d; @@ -133,9 +150,12 @@ Index::Index(const MySetup &setup) _invertThreads(SequencedTaskExecutor::create(invert_executor, 2)), _pushThreads(SequencedTaskExecutor::create(push_executor, 2)), index(schema, setup, *_invertThreads, *_pushThreads), - builder(schema), + builder([&setup](auto& header) { setup.add_fields(header); }), + sfb(builder), + builder_doc(), docid(1), - currentField() + currentField(), + add_space(false) { } Index::~Index() = default; diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp index 969f483eef6..3995f06628c 100644 --- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp @@ -1,11 +1,21 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/memoryindex/url_field_inverter.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/schema_index_fields.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> -#include <vespa/searchlib/memoryindex/url_field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter_backend.h> @@ -14,6 +24,10 @@ namespace search { using document::Document; +using document::ArrayFieldValue; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; using index::schema::CollectionType; using index::schema::DataType; @@ -26,160 +40,88 @@ namespace { const vespalib::string url = "url"; Document::UP -makeDoc10Single(DocBuilder &b) +makeDoc10Single(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StructFieldValue url_value(b.get_data_type("url")); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("81").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("4").build()); + doc->setValue("url", url_value); + return doc; } Document::UP -makeDoc10Array(DocBuilder &b) +makeDoc10Array(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + ArrayFieldValue url_array(b.get_data_type("Array<url>")); + StructFieldValue url_value(b.get_data_type("url")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("82").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("8").build()); + url_array.add(url_value); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("fragment", sfb.tokenize("9").build()); + url_array.add(url_value); + doc->setValue("url", url_array); + return doc; } Document::UP -makeDoc10WeightedSet(DocBuilder &b) +makeDoc10WeightedSet(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + WeightedSetFieldValue url_wset(b.get_data_type("WeightedSet<url>")); + StructFieldValue url_value(b.get_data_type("url")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("83").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("12").build()); + url_wset.add(url_value, 4); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("port", sfb.tokenize("85").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("13").build()); + url_wset.add(url_value, 7); + doc->setValue("url", url_wset); + return doc; } Document::UP -makeDoc10Empty(DocBuilder &b) +makeDoc10Empty(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::10"); } } struct UrlFieldInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; WordStore _word_store; FieldIndexRemover _remover; test::OrderedFieldIndexInserterBackend _inserter_backend; @@ -195,9 +137,10 @@ struct UrlFieldInverterTest : public ::testing::Test { return schema; } - UrlFieldInverterTest(Schema::CollectionType collectionType) + UrlFieldInverterTest(Schema::CollectionType collectionType, + EmptyDocBuilder::AddFieldsType add_fields) : _schema(makeSchema(collectionType)), - _b(_schema), + _b(add_fields), _word_store(), _remover(_word_store), _inserter_backend(), @@ -250,16 +193,32 @@ struct UrlFieldInverterTest : public ::testing::Test { UrlFieldInverterTest::~UrlFieldInverterTest() = default; +EmptyDocBuilder::AddFieldsType +add_single_url = [](auto& header) { + header.addField("url", UrlDataType::getInstance().getId()); }; + +EmptyDocBuilder::AddFieldsType +add_array_url = [](auto& header) { + using namespace document::config_builder; + header.addField("url", Array(UrlDataType::getInstance().getId())); }; + +EmptyDocBuilder::AddFieldsType +add_wset_url = [](auto& header) { + using namespace document::config_builder; + header.addField("url", Wset(UrlDataType::getInstance().getId())); }; + + + struct SingleInverterTest : public UrlFieldInverterTest { - SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE) {} + SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE, add_single_url) {} }; struct ArrayInverterTest : public UrlFieldInverterTest { - ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY) {} + ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY, add_array_url) {} }; struct WeightedSetInverterTest : public UrlFieldInverterTest { - WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET) {} + WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET, add_wset_url) {} }; |