diff options
Diffstat (limited to 'searchlib/src/tests/index')
8 files changed, 150 insertions, 537 deletions
diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore deleted file mode 100644 index 999644fce87..00000000000 --- a/searchlib/src/tests/index/docbuilder/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*_test -.depend -Makefile -docbuilder_test -searchlib_docbuilder_test_app diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt deleted file mode 100644 index 7a969f602ea..00000000000 --- a/searchlib/src/tests/index/docbuilder/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_docbuilder_test_app TEST - SOURCES - docbuilder_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app) diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp deleted file mode 100644 index f76b61dcb78..00000000000 --- a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/log/log.h> -LOG_SETUP("docbuilder_test"); -#include <boost/algorithm/string/classification.hpp> -#include <boost/algorithm/string/split.hpp> -#include <vespa/searchlib/index/docbuilder.h> -#include <vespa/vespalib/encoding/base64.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/document/repo/fixedtyperepo.h> -#include <iostream> - -using namespace document; -using search::index::schema::CollectionType; - -namespace search::index { - -namespace -{ -std::string empty; -} - -namespace linguistics -{ -const vespalib::string SPANTREE_NAME("linguistics"); -} - - -TEST("test docBuilder") -{ - Schema s; - s.addIndexField(Schema::IndexField("ia", schema::DataType::STRING)); - s.addIndexField(Schema::IndexField("ib", schema::DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("ic", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addUriIndexFields(Schema::IndexField("iu", schema::DataType::STRING)); - s.addUriIndexFields(Schema::IndexField("iau", schema::DataType::STRING, CollectionType::ARRAY)); - s.addUriIndexFields(Schema::IndexField("iwu", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("aa", schema::DataType::INT32)); - s.addAttributeField(Schema::AttributeField("ab", schema::DataType::FLOAT)); - s.addAttributeField(Schema::AttributeField("ac", schema::DataType::STRING)); - s.addAttributeField(Schema::AttributeField("ad", schema::DataType::INT32, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("ae", schema::DataType::FLOAT, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("af", schema::DataType::STRING, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("ag", schema::DataType::INT32, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("ah", schema::DataType::FLOAT, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("ai", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("asp1", schema::DataType::INT32)); - s.addAttributeField(Schema::AttributeField("asp2", schema::DataType::INT64)); - s.addAttributeField(Schema::AttributeField("aap1", schema::DataType::INT32, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("aap2", schema::DataType::INT64, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("awp1", schema::DataType::INT32, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("awp2", schema::DataType::INT64, CollectionType::WEIGHTEDSET)); - - DocBuilder b(s); - Document::UP doc; - std::vector<std::string> lines; - std::vector<std::string>::const_iterator itr; - std::string xml; - - { // empty - doc = b.startDocument("id:ns:searchdocument::0").endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::0\"/>", *itr++); - EXPECT_EQUAL("", *itr++); - EXPECT_TRUE(itr == lines.end()); - } - { // all fields set - std::vector<char> binaryBlob; - binaryBlob.push_back('\0'); - binaryBlob.push_back('\2'); - binaryBlob.push_back('\1'); - std::string raw1s("Single Raw Element"); - std::string raw1a0("Array Raw Element 0"); - std::string raw1a1("Array Raw Element 1"); - std::string raw1w0("Weighted Set Raw Element 0"); - std::string raw1w1("Weighted Set Raw Element 1"); - raw1s += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1a0 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1a1 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1w0 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1w1 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - b.startDocument("id:ns:searchdocument::1"); - b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField(); - b.startIndexField("ib").startElement().addStr("foo").endElement(). - startElement(1).addStr("bar").addStr("baz").endElement().endField(); - b. startIndexField("ic"). - startElement(20).addStr("bar").addStr("baz").endElement(). - startElement().addStr("foo").endElement(). - endField(); - b.startIndexField("iu"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - b.startIndexField("iau"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - b.startIndexField("iwu"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - b.startAttributeField("aa").addInt(2147483647).endField(); - b.startAttributeField("ab").addFloat(1234.56).endField(); - b.startAttributeField("ac").addStr("foo baz").endField(); - b.startAttributeField("ad").startElement().addInt(10).endElement().endField(); - b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField(); - b.startAttributeField("af").startElement().addStr("foo").endElement().endField(); - b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField(); - b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField(); - b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField(); - b.startAttributeField("asp1").addInt(1001).endField(); - b.startAttributeField("asp2").addPosition(1002, 1003).endField(); - b.startAttributeField("aap1"). - startElement().addInt(1004).endElement(). - startElement().addInt(1005).endElement(). - endField(); - b.startAttributeField("aap2"). - startElement().addPosition(1006, 1007).endElement(). - startElement().addPosition(1008, 1009).endElement(). - endField(); - b.startAttributeField("awp1"). - startElement(41).addInt(1010).endElement(). - startElement(42).addInt(1011).endElement(). - endField(); - b.startAttributeField("awp2"). - startElement(43).addPosition(1012, 1013).endElement(). - startElement(44).addPosition(1014, 1015).endElement(). - endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::1\">", *itr++); - EXPECT_EQUAL("<iu>", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:81/fluke?ab=2#4</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>81</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>4</fragment>", *itr++); - EXPECT_EQUAL("</iu>", *itr++); - EXPECT_EQUAL("<aa>2147483647</aa>", *itr++); - EXPECT_EQUAL("<aap2>", *itr++); - EXPECT_EQUAL("<item>1047806</item>", *itr++); - EXPECT_EQUAL("<item>1048322</item>", *itr++); - EXPECT_EQUAL("</aap2>", *itr++); - EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++); - EXPECT_EQUAL("<ae>", *itr++); - EXPECT_EQUAL("<item>10.5</item>", *itr++); - EXPECT_EQUAL("</ae>", *itr++); - EXPECT_EQUAL("<ib>", *itr++); - EXPECT_EQUAL("<item>foo</item>", *itr++); - EXPECT_EQUAL("<item>bar baz</item>", *itr++); - EXPECT_EQUAL("</ib>", *itr++); - EXPECT_EQUAL("<ah>", *itr++); - EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++); - EXPECT_EQUAL("</ah>", *itr++); - EXPECT_EQUAL("<ic>", *itr++); - EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++); - EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++); - EXPECT_EQUAL("</ic>", *itr++); - EXPECT_EQUAL("<ac>foo baz</ac>", *itr++); - EXPECT_EQUAL("<awp2>", *itr++); - EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++); - EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++); - EXPECT_EQUAL("</awp2>", *itr++); - EXPECT_EQUAL("<iau>", *itr++); - EXPECT_EQUAL("<item>", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:82/fluke?ab=2#8</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>82</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>8</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("<item>", *itr++); - EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++); - EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>82</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>9</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("</iau>", *itr++); - EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++); - EXPECT_EQUAL("<ai>", *itr++); - EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++); - EXPECT_EQUAL("</ai>", *itr++); - EXPECT_EQUAL("<asp1>1001</asp1>", *itr++); - EXPECT_EQUAL("<ad>", *itr++); - EXPECT_EQUAL("<item>10</item>", *itr++); - EXPECT_EQUAL("</ad>", *itr++); - EXPECT_EQUAL("<iwu>", *itr++); - EXPECT_EQUAL("<item weight=\"4\">", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:83/fluke?ab=2#12</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>83</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>12</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("<item weight=\"7\">", *itr++); - EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++); - EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>85</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>13</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("</iwu>", *itr++); - EXPECT_EQUAL("<ab>1234.56</ab>", *itr++); - EXPECT_EQUAL("<ag>", *itr++); - EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++); - EXPECT_EQUAL("</ag>", *itr++); - EXPECT_EQUAL("<awp1>", *itr++); - EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++); - EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++); - EXPECT_EQUAL("</awp1>", *itr++); - EXPECT_EQUAL("<aap1>", *itr++); - EXPECT_EQUAL("<item>1004</item>", *itr++); - EXPECT_EQUAL("<item>1005</item>", *itr++); - EXPECT_EQUAL("</aap1>", *itr++); - EXPECT_EQUAL("<af>", *itr++); - EXPECT_EQUAL("<item>foo</item>", *itr++); - EXPECT_EQUAL("</af>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); -#if 0 - std::cout << "onedoc xml start -----" << std::endl << - xml << std::endl << - "-------" << std::endl; - std::cout << "onedoc toString start ----" << std::endl << - doc->toString(true) << std::endl << - "-------" << std::endl; -#endif - } - { // create one more to see that everything is cleared - b.startDocument("id:ns:searchdocument::2"); - b.startIndexField("ia").addStr("yes").endField(); - b.startAttributeField("aa").addInt(20).endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::2\">", *itr++); - EXPECT_EQUAL("<aa>20</aa>", *itr++); - EXPECT_EQUAL("<ia>yes</ia>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); - } - { // create field with cjk chars - b.startDocument("id:ns:searchdocument::3"); - b.startIndexField("ia"). - addStr("我就是那个"). - setAutoSpace(false). - addStr("大灰狼"). - setAutoSpace(true). - endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::3\">", *itr++); - EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); - const FieldValue::UP iaval = doc->getValue("ia"); - ASSERT_TRUE(iaval.get() != NULL); - const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *> - (iaval.get()); - ASSERT_TRUE(iasval != NULL); - StringFieldValue::SpanTrees trees = iasval->getSpanTrees(); - const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); - ASSERT_TRUE(tree != NULL); - std::vector<Span> spans; - std::vector<Span> expSpans; - for (SpanTree::const_iterator i = tree->begin(), ie = tree->end(); - i != ie; ++i) { - Annotation &ann = const_cast<Annotation &>(*i); - const Span *span = dynamic_cast<const Span *>(ann.getSpanNode()); - if (span == NULL) - continue; - spans.push_back(*span); - } - expSpans.push_back(Span(0, 15)); - expSpans.push_back(Span(0, 15)); - expSpans.push_back(Span(15, 9)); - expSpans.push_back(Span(15, 9)); - ASSERT_TRUE(expSpans == spans); -#if 0 - std::cout << "onedoc xml start -----" << std::endl << - xml << std::endl << - "-------" << std::endl; - std::cout << "onedoc toString start ----" << std::endl << - doc->toString(true) << std::endl << - "-------" << std::endl; -#endif - } -} - -TEST("test if index names are valid uri parts") { - EXPECT_FALSE(UriField::mightBePartofUri("all")); - EXPECT_FALSE(UriField::mightBePartofUri("fragment")); - EXPECT_FALSE(UriField::mightBePartofUri(".all")); - EXPECT_FALSE(UriField::mightBePartofUri("all.b")); - EXPECT_TRUE(UriField::mightBePartofUri("b.all")); - EXPECT_TRUE(UriField::mightBePartofUri("b.scheme")); - EXPECT_TRUE(UriField::mightBePartofUri("b.host")); - EXPECT_TRUE(UriField::mightBePartofUri("b.port")); - EXPECT_TRUE(UriField::mightBePartofUri("b.hostname")); - EXPECT_TRUE(UriField::mightBePartofUri("b.path")); - EXPECT_TRUE(UriField::mightBePartofUri("b.query")); - EXPECT_TRUE(UriField::mightBePartofUri("b.fragment")); -} - -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore deleted file mode 100644 index f15be1efcfe..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*_test -.depend -Makefile -doctypebuilder_test -searchlib_doctypebuilder_test_app diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt deleted file mode 100644 index 348ecde5a7c..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_doctypebuilder_test_app TEST - SOURCES - doctypebuilder_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app) diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp deleted file mode 100644 index 95854fa11b2..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/searchlib/index/doctypebuilder.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/vespalib/testkit/testapp.h> - -using namespace document; - -namespace search { -namespace index { - -using schema::CollectionType; -using schema::DataType; - -TEST("testSearchDocType") { - Schema s; - s.addIndexField(Schema::IndexField("ia", DataType::STRING)); - s.addIndexField(Schema::IndexField("ib", DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("ic", DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addUriIndexFields(Schema::IndexField("iu", DataType::STRING)); - s.addUriIndexFields(Schema::IndexField("iau", DataType::STRING, CollectionType::ARRAY)); - s.addUriIndexFields(Schema::IndexField("iwu", DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("aa", DataType::INT32)); - s.addAttributeField(Schema::AttributeField("spos", DataType::INT64)); - s.addAttributeField(Schema::AttributeField("apos", DataType::INT64, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("wpos", DataType::INT64, CollectionType::WEIGHTEDSET)); - - DocTypeBuilder docTypeBuilder(s); - document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig(); - DocumentTypeRepo repo(config); - const DocumentType *docType = repo.getDocumentType("searchdocument"); - ASSERT_TRUE(docType); - EXPECT_EQUAL(10u, docType->getFieldCount()); - - EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName()); - EXPECT_EQUAL("Array<String>", - docType->getField("ib").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<String>", - docType->getField("ic").getDataType().getName()); - EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName()); - EXPECT_EQUAL("Array<url>", - docType->getField("iau").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<url>", - docType->getField("iwu").getDataType().getName()); - - EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName()); - EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName()); - EXPECT_EQUAL("Array<Long>", - docType->getField("apos").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<Long>", - docType->getField("wpos").getDataType().getName()); -} - -TEST("require that multiple fields can have the same type") { - Schema s; - s.addIndexField(Schema::IndexField("array1", DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("array2", DataType::STRING, CollectionType::ARRAY)); - DocTypeBuilder docTypeBuilder(s); - document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig(); - DocumentTypeRepo repo(config); - const DocumentType *docType = repo.getDocumentType("searchdocument"); - ASSERT_TRUE(docType); - EXPECT_EQUAL(2u, docType->getFieldCount()); - - EXPECT_EQUAL("Array<String>", - docType->getField("array1").getDataType().getName()); - EXPECT_EQUAL("Array<String>", - docType->getField("array2").getDataType().getName()); -} - -} // namespace index -} // namespace search - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/string_field_builder/CMakeLists.txt b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt new file mode 100644 index 00000000000..f8774eae5ca --- /dev/null +++ b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_string_field_builder_test_app TEST + SOURCES + string_field_builder_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_string_field_builder_test_app COMMAND searchlib_string_field_builder_test_app) diff --git a/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp new file mode 100644 index 00000000000..8c2b641f724 --- /dev/null +++ b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/index/string_field_builder.h> +#include <vespa/document/annotation/annotation.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantree.h> +#include <vespa/document/datatype/annotationtype.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <cassert> +#include <iostream> + +using document::Annotation; +using document::AnnotationType; +using document::Span; +using document::SpanNode; +using document::SpanTree; +using document::StringFieldValue; +using search::index::EmptyDocBuilder; +using search::index::StringFieldBuilder; + +namespace +{ + +const vespalib::string SPANTREE_NAME("linguistics"); + +struct MyAnnotation { + int32_t start; + int32_t length; + std::optional<vespalib::string> label; + + MyAnnotation(int32_t start_in, int32_t length_in) noexcept + : start(start_in), + length(length_in), + label() + { + } + + MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept + : start(start_in), + length(length_in), + label(label_in) + { + } + + bool operator==(const MyAnnotation& rhs) const noexcept; +}; + +bool +MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept +{ + return start == rhs.start && + length == rhs.length && + label == rhs.label; +} + + +std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) { + os << "[" << ann.start << "," << ann.length << "]"; + if (ann.label.has_value()) { + os << "(\"" << ann.label.value() << "\")"; + } + return os; +} + +} + +class StringFieldBuilderTest : public testing::Test +{ +protected: + EmptyDocBuilder edb; + StringFieldBuilder sfb; + StringFieldBuilderTest(); + ~StringFieldBuilderTest(); + std::vector<MyAnnotation> get_annotations(const StringFieldValue& val); + void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val); +}; + +StringFieldBuilderTest::StringFieldBuilderTest() + : testing::Test(), + edb(), + sfb(edb) +{ +} + +StringFieldBuilderTest::~StringFieldBuilderTest() = default; + +std::vector<MyAnnotation> +StringFieldBuilderTest::get_annotations(const StringFieldValue& val) +{ + std::vector<MyAnnotation> result; + StringFieldValue::SpanTrees trees = val.getSpanTrees(); + const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME); + if (tree != nullptr) { + for (auto& ann : *tree) { + assert(ann.getType() == *AnnotationType::TERM); + auto span = dynamic_cast<const Span *>(ann.getSpanNode()); + if (span == nullptr) { + continue; + } + auto ann_fv = ann.getFieldValue(); + if (ann_fv == nullptr) { + result.emplace_back(span->from(), span->length()); + } else { + result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue()); + } + } + } + return result; +} + +void +StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val) +{ + EXPECT_EQ(exp, get_annotations(val)); + EXPECT_EQ(plain, val.getValue()); +} + +TEST_F(StringFieldBuilderTest, no_annotations) +{ + assert_annotations({}, "foo", StringFieldValue("foo")); +} + +TEST_F(StringFieldBuilderTest, single_word) +{ + assert_annotations({{0, 4}}, "word", sfb.word("word").build()); +} + +TEST_F(StringFieldBuilderTest, tokenize) +{ + assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build()); +} + +TEST_F(StringFieldBuilderTest, alt_word) +{ + assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build()); +} + +GTEST_MAIN_RUN_ALL_TESTS() |