summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/memoryindex/urlfieldinverter
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/memoryindex/urlfieldinverter')
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore1
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp579
5 files changed, 591 insertions, 0 deletions
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore
new file mode 100644
index 00000000000..b2636fe5e81
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore
@@ -0,0 +1 @@
+searchlib_urlfieldinverter_test_app
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt
new file mode 100644
index 00000000000..c5a0374fad9
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_urlfieldinverter_test_app
+ SOURCES
+ urlfieldinverter_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_urlfieldinverter_test_app COMMAND searchlib_urlfieldinverter_test_app)
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/DESC b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC
new file mode 100644
index 00000000000..00115ada607
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC
@@ -0,0 +1 @@
+UrlField inverter test. Take a look at urlfieldinverter_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/FILES b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES
new file mode 100644
index 00000000000..ac08b0a3e90
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES
@@ -0,0 +1 @@
+urlfieldinverter_test.cpp
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp
new file mode 100644
index 00000000000..30b5883f153
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp
@@ -0,0 +1,579 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/* -*- mode: C++; coding: utf-8; -*- */
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("urlfieldinverter_test");
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/memoryindex/fieldinverter.h>
+#include <vespa/searchlib/memoryindex/urlfieldinverter.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+
+namespace search
+{
+
+
+using document::Document;
+using index::DocBuilder;
+using index::DocTypeBuilder;
+using index::Schema;
+
+namespace memoryindex
+{
+
+namespace {
+const vespalib::string url = "url";
+}
+
+
+namespace
+{
+
+Document::UP
+makeDoc10Single(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("url").
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("81").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ addTermAnnotation("altfluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("4").
+ endSubField().
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc10Array(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("url").
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ addTermAnnotation("altfluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("8").
+ endSubField().
+ endElement().
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("9").
+ endSubField().
+ endElement().
+ endField();
+ return b.endDocument();
+}
+
+Document::UP
+makeDoc10WeightedSet(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("url").
+ startElement(4).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("83").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ addTermAnnotation("altfluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("12").
+ endSubField().
+ endElement().
+ startElement(7).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("85").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("13").
+ endSubField().
+ endElement().
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc10Empty(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ return b.endDocument();
+}
+
+}
+
+struct Fixture
+{
+ Schema _schema;
+ DocBuilder _b;
+ std::vector<std::unique_ptr<FieldInverter> > _inverters;
+ std::unique_ptr<UrlFieldInverter> _urlInverter;
+ test::OrderedDocumentInserter _inserter;
+ DocTypeBuilder::SchemaIndexFields _schemaIndexFields;
+
+ static Schema
+ makeSchema(Schema::CollectionType collectionType)
+ {
+ Schema schema;
+ schema.addUriIndexFields(Schema::IndexField("url", Schema::STRING,
+ collectionType));
+ return schema;
+ }
+
+ Fixture(Schema::CollectionType collectionType)
+ : _schema(makeSchema(collectionType)),
+ _b(_schema),
+ _inverters(),
+ _urlInverter(),
+ _inserter(),
+ _schemaIndexFields()
+ {
+ _schemaIndexFields.setup(_schema);
+ for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields();
+ ++fieldId) {
+ _inverters.push_back(std::make_unique<FieldInverter>(_schema,
+ fieldId));
+ }
+ DocTypeBuilder::UriField &urlField =
+ _schemaIndexFields._uriFields.front();
+ _urlInverter = std::make_unique<UrlFieldInverter>
+ (collectionType,
+ _inverters[urlField._all].get(),
+ _inverters[urlField._scheme].get(),
+ _inverters[urlField._host].get(),
+ _inverters[urlField._port].get(),
+ _inverters[urlField._path].get(),
+ _inverters[urlField._query].get(),
+ _inverters[urlField._fragment].get(),
+ _inverters[urlField._hostname].get());
+ }
+
+ void
+ invertDocument(uint32_t docId, const Document &doc)
+ {
+ _urlInverter->invertField(docId, doc.getValue(url));
+ }
+
+ void
+ pushDocuments()
+ {
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inverters) {
+ _inserter.setFieldId(fieldId);
+ inverter->pushDocuments(_inserter);
+ ++fieldId;
+ }
+ }
+
+ void
+ enableAnnotations()
+ {
+ _urlInverter->setUseAnnotations(true);
+ }
+};
+
+
+TEST_F("requireThatSingleUrlFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.invertDocument(10, *makeDoc10Single(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=4,a=10,"
+ "w=81,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=81,a=10,"
+ "f=4,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=4,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatArrayUrlFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.invertDocument(10, *makeDoc10Array(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=8,a=10,"
+ "w=82,a=10,"
+ "w=9,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=82,a=10,"
+ "f=4,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=8,a=10,"
+ "w=9,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET))
+{
+ f.invertDocument(10, *makeDoc10WeightedSet(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=12,a=10,"
+ "w=13,a=10,"
+ "w=2,a=10,"
+ "w=83,a=10,"
+ "w=85,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=83,a=10,"
+ "w=85,a=10,"
+ "f=4,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=12,a=10,"
+ "w=13,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedSingleUrlFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Single(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=4,a=10,"
+ "w=81,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=81,a=10,"
+ "f=4,"
+ "w=altfluke,a=10,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=4,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatAnnotatedArrayUrlFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Array(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=8,a=10,"
+ "w=82,a=10,"
+ "w=9,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=82,a=10,"
+ "f=4,"
+ "w=altfluke,a=10,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=8,a=10,"
+ "w=9,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedWeightedSetFieldWorks",
+ Fixture(Schema::WEIGHTEDSET))
+{
+ f.enableAnnotations();
+ f._inserter.setVerbose();
+ f.invertDocument(10, *makeDoc10WeightedSet(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=12,a=10(e=0,w=4,l=9[8]),"
+ "w=13,a=10(e=1,w=7,l=9[8]),"
+ "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7]),"
+ "w=83,a=10(e=0,w=4,l=9[4]),"
+ "w=85,a=10(e=1,w=7,l=9[4]),"
+ "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6]),"
+ "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3]),"
+ "w=flickr,a=10(e=1,w=7,l=9[2]),"
+ "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5]),"
+ "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0]),"
+ "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1]),"
+ "w=yahoo,a=10(e=0,w=4,l=9[2]),"
+ "f=1,"
+ "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0]),"
+ "f=2,"
+ "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2]),"
+ "w=flickr,a=10(e=1,w=7,l=3[1]),"
+ "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0]),"
+ "w=yahoo,a=10(e=0,w=4,l=3[1]),"
+ "f=3,"
+ "w=83,a=10(e=0,w=4,l=1[0]),"
+ "w=85,a=10(e=1,w=7,l=1[0]),"
+ "f=4,"
+ "w=altfluke,a=10(e=0,w=4,l=1[0]),"
+ "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0]),"
+ "f=5,"
+ "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1]),"
+ "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0]),"
+ "f=6,"
+ "w=12,a=10(e=0,w=4,l=1[0]),"
+ "w=13,a=10(e=1,w=7,l=1[0]),"
+ "f=7,"
+ "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4]),"
+ "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0]),"
+ "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3]),"
+ "w=flickr,a=10(e=1,w=7,l=5[2]),"
+ "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1]),"
+ "w=yahoo,a=10(e=0,w=4,l=5[2])",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatEmptySingleFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatEmptyArrayFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatEmptyWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET))
+{
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedEmptySingleFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedEmptyArrayFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedEmptyWeightedSetFieldWorks",
+ Fixture(Schema::WEIGHTEDSET))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+} // namespace memoryindex
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }