diff options
Diffstat (limited to 'searchlib/src/tests/memoryindex/urlfieldinverter')
5 files changed, 591 insertions, 0 deletions
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore new file mode 100644 index 00000000000..b2636fe5e81 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_urlfieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..c5a0374fad9 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_urlfieldinverter_test_app + SOURCES + urlfieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_urlfieldinverter_test_app COMMAND searchlib_urlfieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/DESC b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC new file mode 100644 index 00000000000..00115ada607 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC @@ -0,0 +1 @@ +UrlField inverter test. Take a look at urlfieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/FILES b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES new file mode 100644 index 00000000000..ac08b0a3e90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES @@ -0,0 +1 @@ +urlfieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp new file mode 100644 index 00000000000..30b5883f153 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp @@ -0,0 +1,579 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("urlfieldinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/searchlib/memoryindex/urlfieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/repo/fixedtyperepo.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::DocTypeBuilder; +using index::Schema; + +namespace memoryindex +{ + +namespace { +const vespalib::string url = "url"; +} + + +namespace +{ + +Document::UP +makeDoc10Single(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Array(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + +Document::UP +makeDoc10WeightedSet(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Empty(DocBuilder &b) +{ + b.startDocument("doc::10"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector<std::unique_ptr<FieldInverter> > _inverters; + std::unique_ptr<UrlFieldInverter> _urlInverter; + test::OrderedDocumentInserter _inserter; + DocTypeBuilder::SchemaIndexFields _schemaIndexFields; + + static Schema + makeSchema(Schema::CollectionType collectionType) + { + Schema schema; + schema.addUriIndexFields(Schema::IndexField("url", Schema::STRING, + collectionType)); + return schema; + } + + Fixture(Schema::CollectionType collectionType) + : _schema(makeSchema(collectionType)), + _b(_schema), + _inverters(), + _urlInverter(), + _inserter(), + _schemaIndexFields() + { + _schemaIndexFields.setup(_schema); + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique<FieldInverter>(_schema, + fieldId)); + } + DocTypeBuilder::UriField &urlField = + _schemaIndexFields._uriFields.front(); + _urlInverter = std::make_unique<UrlFieldInverter> + (collectionType, + _inverters[urlField._all].get(), + _inverters[urlField._scheme].get(), + _inverters[urlField._host].get(), + _inverters[urlField._port].get(), + _inverters[urlField._path].get(), + _inverters[urlField._query].get(), + _inverters[urlField._fragment].get(), + _inverters[urlField._hostname].get()); + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + _urlInverter->invertField(docId, doc.getValue(url)); + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + enableAnnotations() + { + _urlInverter->setUseAnnotations(true); + } +}; + + +TEST_F("requireThatSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10," + "w=13,a=10," + "w=2,a=10," + "w=83,a=10," + "w=85,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=83,a=10," + "w=85,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=12,a=10," + "w=13,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAnnotatedArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f._inserter.setVerbose(); + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10(e=0,w=4,l=9[8])," + "w=13,a=10(e=1,w=7,l=9[8])," + "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7])," + "w=83,a=10(e=0,w=4,l=9[4])," + "w=85,a=10(e=1,w=7,l=9[4])," + "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6])," + "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3])," + "w=flickr,a=10(e=1,w=7,l=9[2])," + "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5])," + "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0])," + "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1])," + "w=yahoo,a=10(e=0,w=4,l=9[2])," + "f=1," + "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=2," + "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2])," + "w=flickr,a=10(e=1,w=7,l=3[1])," + "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0])," + "w=yahoo,a=10(e=0,w=4,l=3[1])," + "f=3," + "w=83,a=10(e=0,w=4,l=1[0])," + "w=85,a=10(e=1,w=7,l=1[0])," + "f=4," + "w=altfluke,a=10(e=0,w=4,l=1[0])," + "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=5," + "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1])," + "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0])," + "f=6," + "w=12,a=10(e=0,w=4,l=1[0])," + "w=13,a=10(e=1,w=7,l=1[0])," + "f=7," + "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4])," + "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0])," + "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3])," + "w=flickr,a=10(e=1,w=7,l=5[2])," + "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1])," + "w=yahoo,a=10(e=0,w=4,l=5[2])", + f._inserter.toStr()); +} + + +TEST_F("requireThatEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } |