summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/index
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/index')
-rw-r--r--searchlib/src/tests/index/docbuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/docbuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/docbuilder/docbuilder_test.cpp437
-rw-r--r--searchlib/src/tests/index/doctypebuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/doctypebuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp74
-rw-r--r--searchlib/src/tests/index/string_field_builder/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp141
8 files changed, 150 insertions, 537 deletions
diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore
deleted file mode 100644
index 999644fce87..00000000000
--- a/searchlib/src/tests/index/docbuilder/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*_test
-.depend
-Makefile
-docbuilder_test
-searchlib_docbuilder_test_app
diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt
deleted file mode 100644
index 7a969f602ea..00000000000
--- a/searchlib/src/tests/index/docbuilder/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_docbuilder_test_app TEST
- SOURCES
- docbuilder_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app)
diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
deleted file mode 100644
index f76b61dcb78..00000000000
--- a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/log/log.h>
-LOG_SETUP("docbuilder_test");
-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <vespa/searchlib/index/docbuilder.h>
-#include <vespa/vespalib/encoding/base64.h>
-#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <iostream>
-
-using namespace document;
-using search::index::schema::CollectionType;
-
-namespace search::index {
-
-namespace
-{
-std::string empty;
-}
-
-namespace linguistics
-{
-const vespalib::string SPANTREE_NAME("linguistics");
-}
-
-
-TEST("test docBuilder")
-{
- Schema s;
- s.addIndexField(Schema::IndexField("ia", schema::DataType::STRING));
- s.addIndexField(Schema::IndexField("ib", schema::DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("ic", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addUriIndexFields(Schema::IndexField("iu", schema::DataType::STRING));
- s.addUriIndexFields(Schema::IndexField("iau", schema::DataType::STRING, CollectionType::ARRAY));
- s.addUriIndexFields(Schema::IndexField("iwu", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("aa", schema::DataType::INT32));
- s.addAttributeField(Schema::AttributeField("ab", schema::DataType::FLOAT));
- s.addAttributeField(Schema::AttributeField("ac", schema::DataType::STRING));
- s.addAttributeField(Schema::AttributeField("ad", schema::DataType::INT32, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("ae", schema::DataType::FLOAT, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("af", schema::DataType::STRING, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("ag", schema::DataType::INT32, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("ah", schema::DataType::FLOAT, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("ai", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("asp1", schema::DataType::INT32));
- s.addAttributeField(Schema::AttributeField("asp2", schema::DataType::INT64));
- s.addAttributeField(Schema::AttributeField("aap1", schema::DataType::INT32, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("aap2", schema::DataType::INT64, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("awp1", schema::DataType::INT32, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("awp2", schema::DataType::INT64, CollectionType::WEIGHTEDSET));
-
- DocBuilder b(s);
- Document::UP doc;
- std::vector<std::string> lines;
- std::vector<std::string>::const_iterator itr;
- std::string xml;
-
- { // empty
- doc = b.startDocument("id:ns:searchdocument::0").endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::0\"/>", *itr++);
- EXPECT_EQUAL("", *itr++);
- EXPECT_TRUE(itr == lines.end());
- }
- { // all fields set
- std::vector<char> binaryBlob;
- binaryBlob.push_back('\0');
- binaryBlob.push_back('\2');
- binaryBlob.push_back('\1');
- std::string raw1s("Single Raw Element");
- std::string raw1a0("Array Raw Element 0");
- std::string raw1a1("Array Raw Element 1");
- std::string raw1w0("Weighted Set Raw Element 0");
- std::string raw1w1("Weighted Set Raw Element 1");
- raw1s += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1a0 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1a1 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1w0 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1w1 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- b.startDocument("id:ns:searchdocument::1");
- b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField();
- b.startIndexField("ib").startElement().addStr("foo").endElement().
- startElement(1).addStr("bar").addStr("baz").endElement().endField();
- b. startIndexField("ic").
- startElement(20).addStr("bar").addStr("baz").endElement().
- startElement().addStr("foo").endElement().
- endField();
- b.startIndexField("iu").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- b.startIndexField("iau").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- b.startIndexField("iwu").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- b.startAttributeField("aa").addInt(2147483647).endField();
- b.startAttributeField("ab").addFloat(1234.56).endField();
- b.startAttributeField("ac").addStr("foo baz").endField();
- b.startAttributeField("ad").startElement().addInt(10).endElement().endField();
- b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField();
- b.startAttributeField("af").startElement().addStr("foo").endElement().endField();
- b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField();
- b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField();
- b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField();
- b.startAttributeField("asp1").addInt(1001).endField();
- b.startAttributeField("asp2").addPosition(1002, 1003).endField();
- b.startAttributeField("aap1").
- startElement().addInt(1004).endElement().
- startElement().addInt(1005).endElement().
- endField();
- b.startAttributeField("aap2").
- startElement().addPosition(1006, 1007).endElement().
- startElement().addPosition(1008, 1009).endElement().
- endField();
- b.startAttributeField("awp1").
- startElement(41).addInt(1010).endElement().
- startElement(42).addInt(1011).endElement().
- endField();
- b.startAttributeField("awp2").
- startElement(43).addPosition(1012, 1013).endElement().
- startElement(44).addPosition(1014, 1015).endElement().
- endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::1\">", *itr++);
- EXPECT_EQUAL("<iu>", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:81/fluke?ab=2#4</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>81</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>4</fragment>", *itr++);
- EXPECT_EQUAL("</iu>", *itr++);
- EXPECT_EQUAL("<aa>2147483647</aa>", *itr++);
- EXPECT_EQUAL("<aap2>", *itr++);
- EXPECT_EQUAL("<item>1047806</item>", *itr++);
- EXPECT_EQUAL("<item>1048322</item>", *itr++);
- EXPECT_EQUAL("</aap2>", *itr++);
- EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++);
- EXPECT_EQUAL("<ae>", *itr++);
- EXPECT_EQUAL("<item>10.5</item>", *itr++);
- EXPECT_EQUAL("</ae>", *itr++);
- EXPECT_EQUAL("<ib>", *itr++);
- EXPECT_EQUAL("<item>foo</item>", *itr++);
- EXPECT_EQUAL("<item>bar baz</item>", *itr++);
- EXPECT_EQUAL("</ib>", *itr++);
- EXPECT_EQUAL("<ah>", *itr++);
- EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++);
- EXPECT_EQUAL("</ah>", *itr++);
- EXPECT_EQUAL("<ic>", *itr++);
- EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++);
- EXPECT_EQUAL("</ic>", *itr++);
- EXPECT_EQUAL("<ac>foo baz</ac>", *itr++);
- EXPECT_EQUAL("<awp2>", *itr++);
- EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++);
- EXPECT_EQUAL("</awp2>", *itr++);
- EXPECT_EQUAL("<iau>", *itr++);
- EXPECT_EQUAL("<item>", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:82/fluke?ab=2#8</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>82</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>8</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("<item>", *itr++);
- EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++);
- EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>82</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>9</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("</iau>", *itr++);
- EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++);
- EXPECT_EQUAL("<ai>", *itr++);
- EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++);
- EXPECT_EQUAL("</ai>", *itr++);
- EXPECT_EQUAL("<asp1>1001</asp1>", *itr++);
- EXPECT_EQUAL("<ad>", *itr++);
- EXPECT_EQUAL("<item>10</item>", *itr++);
- EXPECT_EQUAL("</ad>", *itr++);
- EXPECT_EQUAL("<iwu>", *itr++);
- EXPECT_EQUAL("<item weight=\"4\">", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:83/fluke?ab=2#12</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>83</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>12</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"7\">", *itr++);
- EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++);
- EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>85</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>13</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("</iwu>", *itr++);
- EXPECT_EQUAL("<ab>1234.56</ab>", *itr++);
- EXPECT_EQUAL("<ag>", *itr++);
- EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++);
- EXPECT_EQUAL("</ag>", *itr++);
- EXPECT_EQUAL("<awp1>", *itr++);
- EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++);
- EXPECT_EQUAL("</awp1>", *itr++);
- EXPECT_EQUAL("<aap1>", *itr++);
- EXPECT_EQUAL("<item>1004</item>", *itr++);
- EXPECT_EQUAL("<item>1005</item>", *itr++);
- EXPECT_EQUAL("</aap1>", *itr++);
- EXPECT_EQUAL("<af>", *itr++);
- EXPECT_EQUAL("<item>foo</item>", *itr++);
- EXPECT_EQUAL("</af>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
-#if 0
- std::cout << "onedoc xml start -----" << std::endl <<
- xml << std::endl <<
- "-------" << std::endl;
- std::cout << "onedoc toString start ----" << std::endl <<
- doc->toString(true) << std::endl <<
- "-------" << std::endl;
-#endif
- }
- { // create one more to see that everything is cleared
- b.startDocument("id:ns:searchdocument::2");
- b.startIndexField("ia").addStr("yes").endField();
- b.startAttributeField("aa").addInt(20).endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::2\">", *itr++);
- EXPECT_EQUAL("<aa>20</aa>", *itr++);
- EXPECT_EQUAL("<ia>yes</ia>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
- }
- { // create field with cjk chars
- b.startDocument("id:ns:searchdocument::3");
- b.startIndexField("ia").
- addStr("我就是那个").
- setAutoSpace(false).
- addStr("大灰狼").
- setAutoSpace(true).
- endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::3\">", *itr++);
- EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
- const FieldValue::UP iaval = doc->getValue("ia");
- ASSERT_TRUE(iaval.get() != NULL);
- const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *>
- (iaval.get());
- ASSERT_TRUE(iasval != NULL);
- StringFieldValue::SpanTrees trees = iasval->getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME);
- ASSERT_TRUE(tree != NULL);
- std::vector<Span> spans;
- std::vector<Span> expSpans;
- for (SpanTree::const_iterator i = tree->begin(), ie = tree->end();
- i != ie; ++i) {
- Annotation &ann = const_cast<Annotation &>(*i);
- const Span *span = dynamic_cast<const Span *>(ann.getSpanNode());
- if (span == NULL)
- continue;
- spans.push_back(*span);
- }
- expSpans.push_back(Span(0, 15));
- expSpans.push_back(Span(0, 15));
- expSpans.push_back(Span(15, 9));
- expSpans.push_back(Span(15, 9));
- ASSERT_TRUE(expSpans == spans);
-#if 0
- std::cout << "onedoc xml start -----" << std::endl <<
- xml << std::endl <<
- "-------" << std::endl;
- std::cout << "onedoc toString start ----" << std::endl <<
- doc->toString(true) << std::endl <<
- "-------" << std::endl;
-#endif
- }
-}
-
-TEST("test if index names are valid uri parts") {
- EXPECT_FALSE(UriField::mightBePartofUri("all"));
- EXPECT_FALSE(UriField::mightBePartofUri("fragment"));
- EXPECT_FALSE(UriField::mightBePartofUri(".all"));
- EXPECT_FALSE(UriField::mightBePartofUri("all.b"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.all"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.scheme"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.host"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.port"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.hostname"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.path"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.query"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.fragment"));
-}
-
-}
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore
deleted file mode 100644
index f15be1efcfe..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*_test
-.depend
-Makefile
-doctypebuilder_test
-searchlib_doctypebuilder_test_app
diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
deleted file mode 100644
index 348ecde5a7c..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_doctypebuilder_test_app TEST
- SOURCES
- doctypebuilder_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app)
diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
deleted file mode 100644
index 95854fa11b2..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/document/repo/documenttyperepo.h>
-#include <vespa/searchlib/index/doctypebuilder.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/vespalib/testkit/testapp.h>
-
-using namespace document;
-
-namespace search {
-namespace index {
-
-using schema::CollectionType;
-using schema::DataType;
-
-TEST("testSearchDocType") {
- Schema s;
- s.addIndexField(Schema::IndexField("ia", DataType::STRING));
- s.addIndexField(Schema::IndexField("ib", DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("ic", DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addUriIndexFields(Schema::IndexField("iu", DataType::STRING));
- s.addUriIndexFields(Schema::IndexField("iau", DataType::STRING, CollectionType::ARRAY));
- s.addUriIndexFields(Schema::IndexField("iwu", DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("aa", DataType::INT32));
- s.addAttributeField(Schema::AttributeField("spos", DataType::INT64));
- s.addAttributeField(Schema::AttributeField("apos", DataType::INT64, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("wpos", DataType::INT64, CollectionType::WEIGHTEDSET));
-
- DocTypeBuilder docTypeBuilder(s);
- document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig();
- DocumentTypeRepo repo(config);
- const DocumentType *docType = repo.getDocumentType("searchdocument");
- ASSERT_TRUE(docType);
- EXPECT_EQUAL(10u, docType->getFieldCount());
-
- EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName());
- EXPECT_EQUAL("Array<String>",
- docType->getField("ib").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<String>",
- docType->getField("ic").getDataType().getName());
- EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName());
- EXPECT_EQUAL("Array<url>",
- docType->getField("iau").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<url>",
- docType->getField("iwu").getDataType().getName());
-
- EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName());
- EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName());
- EXPECT_EQUAL("Array<Long>",
- docType->getField("apos").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<Long>",
- docType->getField("wpos").getDataType().getName());
-}
-
-TEST("require that multiple fields can have the same type") {
- Schema s;
- s.addIndexField(Schema::IndexField("array1", DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("array2", DataType::STRING, CollectionType::ARRAY));
- DocTypeBuilder docTypeBuilder(s);
- document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig();
- DocumentTypeRepo repo(config);
- const DocumentType *docType = repo.getDocumentType("searchdocument");
- ASSERT_TRUE(docType);
- EXPECT_EQUAL(2u, docType->getFieldCount());
-
- EXPECT_EQUAL("Array<String>",
- docType->getField("array1").getDataType().getName());
- EXPECT_EQUAL("Array<String>",
- docType->getField("array2").getDataType().getName());
-}
-
-} // namespace index
-} // namespace search
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/string_field_builder/CMakeLists.txt b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt
new file mode 100644
index 00000000000..f8774eae5ca
--- /dev/null
+++ b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_string_field_builder_test_app TEST
+ SOURCES
+ string_field_builder_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_string_field_builder_test_app COMMAND searchlib_string_field_builder_test_app)
diff --git a/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp
new file mode 100644
index 00000000000..8c2b641f724
--- /dev/null
+++ b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp
@@ -0,0 +1,141 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/index/string_field_builder.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <cassert>
+#include <iostream>
+
+using document::Annotation;
+using document::AnnotationType;
+using document::Span;
+using document::SpanNode;
+using document::SpanTree;
+using document::StringFieldValue;
+using search::index::EmptyDocBuilder;
+using search::index::StringFieldBuilder;
+
+namespace
+{
+
+const vespalib::string SPANTREE_NAME("linguistics");
+
+struct MyAnnotation {
+ int32_t start;
+ int32_t length;
+ std::optional<vespalib::string> label;
+
+ MyAnnotation(int32_t start_in, int32_t length_in) noexcept
+ : start(start_in),
+ length(length_in),
+ label()
+ {
+ }
+
+ MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept
+ : start(start_in),
+ length(length_in),
+ label(label_in)
+ {
+ }
+
+ bool operator==(const MyAnnotation& rhs) const noexcept;
+};
+
+bool
+MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept
+{
+ return start == rhs.start &&
+ length == rhs.length &&
+ label == rhs.label;
+}
+
+
+std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) {
+ os << "[" << ann.start << "," << ann.length << "]";
+ if (ann.label.has_value()) {
+ os << "(\"" << ann.label.value() << "\")";
+ }
+ return os;
+}
+
+}
+
+class StringFieldBuilderTest : public testing::Test
+{
+protected:
+ EmptyDocBuilder edb;
+ StringFieldBuilder sfb;
+ StringFieldBuilderTest();
+ ~StringFieldBuilderTest();
+ std::vector<MyAnnotation> get_annotations(const StringFieldValue& val);
+ void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val);
+};
+
+StringFieldBuilderTest::StringFieldBuilderTest()
+ : testing::Test(),
+ edb(),
+ sfb(edb)
+{
+}
+
+StringFieldBuilderTest::~StringFieldBuilderTest() = default;
+
+std::vector<MyAnnotation>
+StringFieldBuilderTest::get_annotations(const StringFieldValue& val)
+{
+ std::vector<MyAnnotation> result;
+ StringFieldValue::SpanTrees trees = val.getSpanTrees();
+ const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
+ if (tree != nullptr) {
+ for (auto& ann : *tree) {
+ assert(ann.getType() == *AnnotationType::TERM);
+ auto span = dynamic_cast<const Span *>(ann.getSpanNode());
+ if (span == nullptr) {
+ continue;
+ }
+ auto ann_fv = ann.getFieldValue();
+ if (ann_fv == nullptr) {
+ result.emplace_back(span->from(), span->length());
+ } else {
+ result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue());
+ }
+ }
+ }
+ return result;
+}
+
+void
+StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val)
+{
+ EXPECT_EQ(exp, get_annotations(val));
+ EXPECT_EQ(plain, val.getValue());
+}
+
+TEST_F(StringFieldBuilderTest, no_annotations)
+{
+ assert_annotations({}, "foo", StringFieldValue("foo"));
+}
+
+TEST_F(StringFieldBuilderTest, single_word)
+{
+ assert_annotations({{0, 4}}, "word", sfb.word("word").build());
+}
+
+TEST_F(StringFieldBuilderTest, tokenize)
+{
+ assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build());
+}
+
+TEST_F(StringFieldBuilderTest, alt_word)
+{
+ assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()