diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchcore/src/tests/proton/docsummary |
Publish
Diffstat (limited to 'searchcore/src/tests/proton/docsummary')
15 files changed, 2348 insertions, 0 deletions
diff --git a/searchcore/src/tests/proton/docsummary/.gitignore b/searchcore/src/tests/proton/docsummary/.gitignore new file mode 100644 index 00000000000..f5e934f84da --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +docsummary_test + +searchcore_docsummary_test_app +searchcore_summaryfieldconverter_test_app diff --git a/searchcore/src/tests/proton/docsummary/CMakeLists.txt b/searchcore/src/tests/proton/docsummary/CMakeLists.txt new file mode 100644 index 00000000000..dca65528840 --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_docsummary_test_app + SOURCES + docsummary.cpp + DEPENDS + searchcore_server + searchcore_initializer + searchcore_reprocessing + searchcore_index + searchcore_persistenceengine + searchcore_feedoperation + searchcore_docsummary + searchcore_matchengine + searchcore_summaryengine + searchcore_matching + searchcore_attribute + searchcore_documentmetastore + searchcore_bucketdb + searchcore_flushengine + searchcore_pcommon + searchcore_grouping + searchcore_proton_metrics + searchcore_fconfig + searchcore_util +) +vespa_add_executable(searchcore_summaryfieldconverter_test_app + SOURCES + summaryfieldconverter_test.cpp + DEPENDS + searchcore_docsummary +) +vespa_add_test(NAME searchcore_docsummary_test_app COMMAND sh docsummary_test.sh) diff --git a/searchcore/src/tests/proton/docsummary/DESC b/searchcore/src/tests/proton/docsummary/DESC new file mode 100644 index 00000000000..ba16d5453b6 --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/DESC @@ -0,0 +1 @@ +docsummary test. Take a look at docsummary.cpp for details. diff --git a/searchcore/src/tests/proton/docsummary/FILES b/searchcore/src/tests/proton/docsummary/FILES new file mode 100644 index 00000000000..e63fca83f2e --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/FILES @@ -0,0 +1 @@ +docsummary.cpp diff --git a/searchcore/src/tests/proton/docsummary/attributes.cfg b/searchcore/src/tests/proton/docsummary/attributes.cfg new file mode 100644 index 00000000000..3866731b410 --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/attributes.cfg @@ -0,0 +1,45 @@ +attribute[16] +attribute[0].name "ba" +attribute[0].datatype INT32 +attribute[1].name "bb" +attribute[1].datatype FLOAT +attribute[2].name "bc" +attribute[2].datatype STRING +attribute[3].name "bd" +attribute[3].datatype INT32 +attribute[3].collectiontype ARRAY +attribute[4].name "be" +attribute[4].datatype FLOAT +attribute[4].collectiontype ARRAY +attribute[5].name "bf" +attribute[5].datatype STRING +attribute[5].collectiontype ARRAY +attribute[6].name "bg" +attribute[6].datatype INT32 +attribute[6].collectiontype WEIGHTEDSET +attribute[7].name "bh" +attribute[7].datatype FLOAT +attribute[7].collectiontype WEIGHTEDSET +attribute[8].name "bi" +attribute[8].datatype STRING +attribute[8].collectiontype WEIGHTEDSET +attribute[9].name "sp1" +attribute[9].datatype INT32 +attribute[10].name "sp2" +attribute[10].datatype INT64 +attribute[11].name "ap1" +attribute[11].datatype INT32 +attribute[11].collectiontype ARRAY +attribute[12].name "ap2" +attribute[12].datatype INT64 +attribute[12].collectiontype ARRAY +attribute[13].name "wp1" +attribute[13].datatype INT32 +attribute[13].collectiontype WEIGHTEDSET +attribute[14].name "wp2" +attribute[14].datatype INT64 +attribute[14].collectiontype WEIGHTEDSET +attribute[15].name "bj" +attribute[15].datatype TENSOR +attribute[15].tensortype "tensor(x{},y{})" +attribute[15].collectiontype SINGLE diff --git a/searchcore/src/tests/proton/docsummary/docsummary.cpp b/searchcore/src/tests/proton/docsummary/docsummary.cpp new file mode 100644 index 00000000000..80eaf56bcba --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/docsummary.cpp @@ -0,0 +1,1296 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("docsummary_test"); +#include <vespa/document/fieldvalue/document.h> +#include <vespa/searchcore/proton/attribute/attribute_writer.h> +#include <vespa/searchcore/proton/common/bucketfactory.h> +#include <vespa/searchcore/proton/docsummary/docsumcontext.h> +#include <vespa/searchcore/proton/docsummary/documentstoreadapter.h> +#include <vespa/searchcore/proton/docsummary/summarymanager.h> +#include <vespa/searchcore/proton/server/documentdb.h> +#include <vespa/searchcore/proton/server/memoryconfigstore.h> +#include <vespa/searchcore/proton/metrics/metricswireservice.h> +#include <vespa/searchcore/proton/server/summaryadapter.h> +#include <vespa/searchlib/common/idestructorcallback.h> +#include <vespa/searchlib/docstore/logdocumentstore.h> +#include <vespa/searchlib/engine/docsumapi.h> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/transactionlog/translogserver.h> +#include <tests/proton/common/dummydbowner.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/transactionlog/nosyncproxy.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/searchlib/attribute/tensorattribute.h> + +using namespace document; +using namespace search; +using namespace search::docsummary; +using namespace search::engine; +using namespace search::index; +using namespace search::transactionlog; +using search::TuneFileDocumentDB; +using document::DocumenttypesConfig; +using storage::spi::Timestamp; +using search::index::DummyFileHeaderContext; +using vespa::config::search::core::ProtonConfig; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::TensorDimensions; +using vespalib::tensor::TensorFactory; + +typedef std::unique_ptr<GeneralResult> GeneralResultPtr; + +namespace proton { + +class DirMaker +{ +public: + DirMaker(const vespalib::string & dir) : + _dir(dir) + { + FastOS_File::MakeDirectory(dir.c_str()); + } + ~DirMaker() + { + FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); + } +private: + vespalib::string _dir; +}; + +class BuildContext +{ +public: + DirMaker _dmk; + DocBuilder _bld; + DocumentTypeRepo::SP _repo; + DummyFileHeaderContext _fileHeaderContext; + vespalib::ThreadStackExecutor _summaryExecutor; + search::transactionlog::NoSyncProxy _noTlSyncer; + search::LogDocumentStore _str; + uint64_t _serialNum; + + BuildContext(const Schema &schema) + : _dmk("summary"), + _bld(schema), + _repo(new DocumentTypeRepo(_bld.getDocumentType())), + _summaryExecutor(4, 128 * 1024), + _noTlSyncer(), + _str(_summaryExecutor, "summary", + LogDocumentStore::Config( + DocumentStore::Config(), + LogDataStore::Config()), + GrowStrategy(), + TuneFileSummary(), + _fileHeaderContext, + _noTlSyncer, + NULL), + _serialNum(1) + { + } + + ~BuildContext(void) + { + } + + void + endDocument(uint32_t docId) + { + Document::UP doc = _bld.endDocument(); + _str.write(_serialNum++, *doc, docId); + } + + FieldCacheRepo::UP createFieldCacheRepo(const ResultConfig &resConfig) const { + return FieldCacheRepo::UP(new FieldCacheRepo(resConfig, _bld.getDocumentType())); + } +}; + + +namespace { + +const char * +getDocTypeName(void) +{ + return "searchdocument"; +} + +Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + vespalib::tensor::DefaultTensor::builder builder; + return TensorFactory::create(cells, dimensions, builder); +} + +} // namespace + + +class DBContext : public DummyDBOwner +{ +public: + DirMaker _dmk; + DummyFileHeaderContext _fileHeaderContext; + TransLogServer _tls; + vespalib::ThreadStackExecutor _summaryExecutor; + bool _mkdirOk; + matching::QueryLimiter _queryLimiter; + vespalib::Clock _clock; + DummyWireService _dummy; + config::DirSpec _spec; + DocumentDBConfigHelper _configMgr; + DocumentDBConfig::DocumenttypesConfigSP _documenttypesConfig; + const DocumentTypeRepo::SP _repo; + TuneFileDocumentDB::SP _tuneFileDocumentDB; + std::unique_ptr<DocumentDB> _ddb; + AttributeWriter::UP _aw; + ISummaryAdapter::SP _sa; + + DBContext(const DocumentTypeRepo::SP &repo, const char *docTypeName) + : _dmk(docTypeName), + _fileHeaderContext(), + _tls("tmp", 9013, ".", _fileHeaderContext), + _summaryExecutor(8, 128*1024), + _mkdirOk(FastOS_File::MakeDirectory("tmpdb")), + _queryLimiter(), + _clock(), + _dummy(), + _spec("."), + _configMgr(_spec, getDocTypeName()), + _documenttypesConfig(new DocumenttypesConfig()), + _repo(repo), + _tuneFileDocumentDB(new TuneFileDocumentDB()), + _ddb(), + _aw(), + _sa() + { + assert(_mkdirOk); + BootstrapConfig::SP b(new BootstrapConfig(1, + _documenttypesConfig, + _repo, + BootstrapConfig::ProtonConfigSP(new ProtonConfig()), + _tuneFileDocumentDB)); + _configMgr.forwardConfig(b); + _configMgr.nextGeneration(0); + if (! FastOS_File::MakeDirectory((std::string("tmpdb/") + docTypeName).c_str())) { abort(); } + _ddb.reset(new DocumentDB("tmpdb", + _configMgr.getConfig(), + "tcp/localhost:9013", + _queryLimiter, + _clock, + DocTypeName(docTypeName), + ProtonConfig(), + *this, + _summaryExecutor, + _summaryExecutor, + NULL, + _dummy, + _fileHeaderContext, + ConfigStore::UP(new MemoryConfigStore), + std::make_shared<vespalib:: + ThreadStackExecutor> + (16, 128 * 1024))), + _ddb->start(); + _ddb->waitForOnlineState(); + _aw = AttributeWriter::UP(new AttributeWriter(_ddb-> + getReadySubDB()-> + getAttributeManager())); + _sa = _ddb->getReadySubDB()->getSummaryAdapter(); + } + ~DBContext() + { + _sa.reset(); + _aw.reset(); + _ddb.reset(); + FastOS_File::EmptyAndRemoveDirectory("tmp"); + FastOS_File::EmptyAndRemoveDirectory("tmpdb"); + } + + void + put(const document::Document &doc, const search::DocumentIdT lid) + { + const document::DocumentId &docId = doc.getId(); + typedef DocumentMetaStore::Result PutRes; + IDocumentMetaStore &dms = _ddb->getReadySubDB()->getDocumentMetaStoreContext().get(); + PutRes putRes(dms.put(docId.getGlobalId(), + BucketFactory::getBucketId(docId), + Timestamp(0u), + lid)); + LOG_ASSERT(putRes.ok()); + uint64_t serialNum = _ddb->getFeedHandler().incSerialNum(); + _aw->put(serialNum, doc, lid, true, std::shared_ptr<IDestructorCallback>()); + _ddb->getReadySubDB()-> + getAttributeManager()->getAttributeFieldWriter().sync(); + _sa->put(serialNum, doc, lid); + const GlobalId &gid = docId.getGlobalId(); + BucketId bucketId(gid.convertToBucketId()); + bucketId.setUsedBits(8); + storage::spi::Timestamp ts(0); + DbDocumentId dbdId(lid); + DbDocumentId prevDbdId(0); + document::Document::SP xdoc(new document::Document(doc)); + PutOperation op(bucketId, + ts, + xdoc, + serialNum, + dbdId, + prevDbdId); + _ddb->getFeedHandler().storeOperation(op); + SearchView *sv(dynamic_cast<SearchView *> + (_ddb->getReadySubDB()->getSearchView().get())); + if (sv != NULL) { + // cf. FeedView::putAttributes() + DocIdLimit &docIdLimit = sv->getDocIdLimit(); + if (docIdLimit.get() <= lid) + docIdLimit.set(lid + 1); + } + } +}; + +class Test : public vespalib::TestApp +{ +private: + std::unique_ptr<vespa::config::search::SummaryConfig> _summaryCfg; + ResultConfig _resultCfg; + std::set<vespalib::string> _markupFields; + + const vespa::config::search::SummaryConfig & + getSummaryConfig() const + { + return *_summaryCfg; + } + + const ResultConfig &getResultConfig() const + { + return _resultCfg; + } + + const std::set<vespalib::string> & + getMarkupFields(void) const + { + return _markupFields; + } + + GeneralResultPtr + getResult(DocumentStoreAdapter & dsa, uint32_t docId); + + GeneralResultPtr + getResult(const DocsumReply & reply, uint32_t id, uint32_t resultClassID); + + bool + assertString(const std::string & exp, + const std::string & fieldName, + DocumentStoreAdapter &dsa, + uint32_t id); + + bool + assertString(const std::string &exp, + const std::string &fieldName, + const DocsumReply &reply, + uint32_t id, + uint32_t resultClassID); + + bool + assertSlime(const std::string &exp, + const DocsumReply &reply, + uint32_t id); + + void + requireThatAdapterHandlesAllFieldTypes(); + + void + requireThatAdapterHandlesMultipleDocuments(); + + void + requireThatAdapterHandlesDocumentIdField(); + + void + requireThatDocsumRequestIsProcessed(); + + void + requireThatRewritersAreUsed(); + + void + requireThatAttributesAreUsed(); + + void + requireThatSummaryAdapterHandlesPutAndRemove(); + + void + requireThatAnnotationsAreUsed(); + + void + requireThatUrisAreUsed(); + + void + requireThatPositionsAreUsed(); + + void + requireThatRawFieldsWorks(); + + void + requireThatFieldCacheRepoCanReturnDefaultFieldCache(); + +public: + Test(); + int Main(); +}; + + +GeneralResultPtr +Test::getResult(DocumentStoreAdapter & dsa, uint32_t docId) +{ + DocsumStoreValue docsum = dsa.getMappedDocsum(docId, false); + ASSERT_TRUE(docsum.pt() != NULL); + GeneralResultPtr retval(new GeneralResult(dsa.getResultClass(), + 0, 0, 0)); + // skip the 4 byte class id + ASSERT_TRUE(retval->unpack(docsum.pt() + 4, + docsum.len() - 4) == 0); + return retval; +} + + +GeneralResultPtr +Test::getResult(const DocsumReply & reply, uint32_t id, uint32_t resultClassID) +{ + GeneralResultPtr retval(new GeneralResult(getResultConfig(). + LookupResultClass(resultClassID), + 0, 0, 0)); + const DocsumReply::Docsum & docsum = reply.docsums[id]; + // skip the 4 byte class id + ASSERT_EQUAL(0, retval->unpack(docsum.data.c_str() + 4, docsum.data.size() - 4)); + return retval; +} + + +bool +Test::assertString(const std::string & exp, const std::string & fieldName, + DocumentStoreAdapter &dsa, + uint32_t id) +{ + GeneralResultPtr res = getResult(dsa, id); + return EXPECT_EQUAL(exp, std::string(res->GetEntry(fieldName.c_str())-> + _stringval, + res->GetEntry(fieldName.c_str())-> + _stringlen)); +} + + +bool +Test::assertString(const std::string & exp, const std::string & fieldName, + const DocsumReply & reply, + uint32_t id, uint32_t resultClassID) +{ + GeneralResultPtr res = getResult(reply, id, resultClassID); + return EXPECT_EQUAL(exp, std::string(res->GetEntry(fieldName.c_str())-> + _stringval, + res->GetEntry(fieldName.c_str())-> + _stringlen)); +} + + +bool +Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id) +{ + const DocsumReply::Docsum & docsum = reply.docsums[id]; + uint32_t classId; + ASSERT_LESS_EQUAL(sizeof(classId), docsum.data.size()); + memcpy(&classId, docsum.data.c_str(), sizeof(classId)); + ASSERT_EQUAL(::search::fs4transport::SLIME_MAGIC_ID, classId); + vespalib::Slime slime; + vespalib::slime::Memory serialized(docsum.data.c_str() + sizeof(classId), + docsum.data.size() - sizeof(classId)); + size_t decodeRes = vespalib::slime::BinaryFormat::decode(serialized, + slime); + ASSERT_EQUAL(decodeRes, serialized.size); + vespalib::Slime expSlime; + size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); + EXPECT_EQUAL(exp.size(), used); + return EXPECT_EQUAL(expSlime, slime); +} + +void +Test::requireThatAdapterHandlesAllFieldTypes() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("a", Schema::INT8)); + s.addSummaryField(Schema::SummaryField("b", Schema::INT16)); + s.addSummaryField(Schema::SummaryField("c", Schema::INT32)); + s.addSummaryField(Schema::SummaryField("d", Schema::INT64)); + s.addSummaryField(Schema::SummaryField("e", Schema::FLOAT)); + s.addSummaryField(Schema::SummaryField("f", Schema::DOUBLE)); + s.addSummaryField(Schema::SummaryField("g", Schema::STRING)); + s.addSummaryField(Schema::SummaryField("h", Schema::STRING)); + s.addSummaryField(Schema::SummaryField("i", Schema::RAW)); + s.addSummaryField(Schema::SummaryField("j", Schema::RAW)); + s.addSummaryField(Schema::SummaryField("k", Schema::STRING)); + s.addSummaryField(Schema::SummaryField("l", Schema::STRING)); + + BuildContext bc(s); + bc._bld.startDocument("doc::0"); + bc._bld.startSummaryField("a").addInt(255).endField(); + bc._bld.startSummaryField("b").addInt(32767).endField(); + bc._bld.startSummaryField("c").addInt(2147483647).endField(); + bc._bld.startSummaryField("d").addInt(2147483648).endField(); + bc._bld.startSummaryField("e").addFloat(1234.56).endField(); + bc._bld.startSummaryField("f").addFloat(9876.54).endField(); + bc._bld.startSummaryField("g").addStr("foo").endField(); + bc._bld.startSummaryField("h").addStr("bar").endField(); + bc._bld.startSummaryField("i").addStr("baz").endField(); + bc._bld.startSummaryField("j").addStr("qux").endField(); + bc._bld.startSummaryField("k").addStr("<foo>").endField(); + bc._bld.startSummaryField("l").addStr("{foo:10}").endField(); + bc.endDocument(0); + + DocumentStoreAdapter dsa(bc._str, + *bc._repo, + getResultConfig(), "class0", + bc.createFieldCacheRepo(getResultConfig())->getFieldCache("class0"), + getMarkupFields()); + GeneralResultPtr res = getResult(dsa, 0); + EXPECT_EQUAL(255u, res->GetEntry("a")->_intval); + EXPECT_EQUAL(32767u, res->GetEntry("b")->_intval); + EXPECT_EQUAL(2147483647u, res->GetEntry("c")->_intval); + EXPECT_EQUAL(2147483648u, res->GetEntry("d")->_int64val); + EXPECT_APPROX(1234.56, res->GetEntry("e")->_doubleval, 10e-5); + EXPECT_APPROX(9876.54, res->GetEntry("f")->_doubleval, 10e-5); + EXPECT_EQUAL("foo", std::string(res->GetEntry("g")->_stringval, + res->GetEntry("g")->_stringlen)); + EXPECT_EQUAL("bar", std::string(res->GetEntry("h")->_stringval, + res->GetEntry("h")->_stringlen)); + EXPECT_EQUAL("baz", std::string(res->GetEntry("i")->_dataval, + res->GetEntry("i")->_datalen)); + EXPECT_EQUAL("qux", std::string(res->GetEntry("j")->_dataval, + res->GetEntry("j")->_datalen)); + EXPECT_EQUAL("<foo>", std::string(res->GetEntry("k")->_stringval, + res->GetEntry("k")->_stringlen)); + EXPECT_EQUAL("{foo:10}", std::string(res->GetEntry("l")->_stringval, + res->GetEntry("l")->_stringlen)); +} + + +void +Test::requireThatAdapterHandlesMultipleDocuments() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("a", Schema::INT32)); + + BuildContext bc(s); + bc._bld.startDocument("doc::0"). + startSummaryField("a"). + addInt(1000). + endField(); + bc.endDocument(0); + bc._bld.startDocument("doc::1"). + startSummaryField("a"). + addInt(2000).endField(); + bc.endDocument(1); + + DocumentStoreAdapter dsa(bc._str, *bc._repo, getResultConfig(), "class1", + bc.createFieldCacheRepo(getResultConfig())->getFieldCache("class1"), + getMarkupFields()); + { // doc 0 + GeneralResultPtr res = getResult(dsa, 0); + EXPECT_EQUAL(1000u, res->GetEntry("a")->_intval); + } + { // doc 1 + GeneralResultPtr res = getResult(dsa, 1); + EXPECT_EQUAL(2000u, res->GetEntry("a")->_intval); + } + { // doc 2 + DocsumStoreValue docsum = dsa.getMappedDocsum(2, false); + EXPECT_TRUE(docsum.pt() == NULL); + } + { // doc 0 (again) + GeneralResultPtr res = getResult(dsa, 0); + EXPECT_EQUAL(1000u, res->GetEntry("a")->_intval); + } + EXPECT_EQUAL(0u, bc._str.lastSyncToken()); + uint64_t flushToken = bc._str.initFlush(bc._serialNum - 1); + bc._str.flush(flushToken); +} + + +void +Test::requireThatAdapterHandlesDocumentIdField() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("documentid", + Schema::STRING)); + BuildContext bc(s); + bc._bld.startDocument("doc::0"). + startSummaryField("documentid"). + addStr("foo"). + endField(); + bc.endDocument(0); + DocumentStoreAdapter dsa(bc._str, *bc._repo, getResultConfig(), "class4", + bc.createFieldCacheRepo(getResultConfig())->getFieldCache("class4"), + getMarkupFields()); + GeneralResultPtr res = getResult(dsa, 0); + EXPECT_EQUAL("doc::0", std::string(res->GetEntry("documentid")->_stringval, + res->GetEntry("documentid")->_stringlen)); +} + + +GlobalId gid1 = DocumentId("doc::1").getGlobalId(); // lid 1 +GlobalId gid2 = DocumentId("doc::2").getGlobalId(); // lid 2 +GlobalId gid3 = DocumentId("doc::3").getGlobalId(); // lid 3 +GlobalId gid4 = DocumentId("doc::4").getGlobalId(); // lid 4 +GlobalId gid9 = DocumentId("doc::9").getGlobalId(); // not existing + + +void +Test::requireThatDocsumRequestIsProcessed() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("a", Schema::INT32)); + + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + dc.put(*bc._bld.startDocument("doc::1"). + startSummaryField("a"). + addInt(10). + endField(). + endDocument(), + 1); + dc.put(*bc._bld.startDocument("doc::2"). + startSummaryField("a"). + addInt(20). + endField(). + endDocument(), + 2); + dc.put(*bc._bld.startDocument("doc::3"). + startSummaryField("a"). + addInt(30). + endField(). + endDocument(), + 3); + dc.put(*bc._bld.startDocument("doc::4"). + startSummaryField("a"). + addInt(40). + endField(). + endDocument(), + 4); + dc.put(*bc._bld.startDocument("doc::5"). + startSummaryField("a"). + addInt(50). + endField(). + endDocument(), + 5); + + DocsumRequest req; + req.resultClassName = "class1"; + req.hits.push_back(DocsumRequest::Hit(gid2)); + req.hits.push_back(DocsumRequest::Hit(gid4)); + req.hits.push_back(DocsumRequest::Hit(gid9)); + DocsumReply::UP rep = dc._ddb->getDocsums(req); + EXPECT_EQUAL(3u, rep->docsums.size()); + EXPECT_EQUAL(2u, rep->docsums[0].docid); + EXPECT_EQUAL(gid2, rep->docsums[0].gid); + EXPECT_EQUAL(20u, getResult(*rep, 0, 1)->GetEntry("a")->_intval); + EXPECT_EQUAL(4u, rep->docsums[1].docid); + EXPECT_EQUAL(gid4, rep->docsums[1].gid); + EXPECT_EQUAL(40u, getResult(*rep, 1, 1)->GetEntry("a")->_intval); + EXPECT_EQUAL(search::endDocId, rep->docsums[2].docid); + EXPECT_EQUAL(gid9, rep->docsums[2].gid); + EXPECT_TRUE(rep->docsums[2].data.get() == NULL); +} + + +void +Test::requireThatRewritersAreUsed() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("aa", Schema::INT32)); + s.addSummaryField(Schema::SummaryField("ab", Schema::INT32)); + + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + dc.put(*bc._bld.startDocument("doc::1"). + startSummaryField("aa"). + addInt(10). + endField(). + startSummaryField("ab"). + addInt(20). + endField(). + endDocument(), + 1); + + DocsumRequest req; + req.resultClassName = "class2"; + req.hits.push_back(DocsumRequest::Hit(gid1)); + DocsumReply::UP rep = dc._ddb->getDocsums(req); + EXPECT_EQUAL(1u, rep->docsums.size()); + EXPECT_EQUAL(20u, getResult(*rep, 0, 2)->GetEntry("aa")->_intval); + EXPECT_EQUAL(0u, getResult(*rep, 0, 2)->GetEntry("ab")->_intval); +} + + +void +addField(Schema & s, + const std::string &name, + Schema::DataType dtype, + Schema::CollectionType ctype) +{ + s.addSummaryField(Schema::SummaryField(name, dtype, ctype)); + s.addAttributeField(Schema::AttributeField(name, dtype, ctype)); +} + + +void +Test::requireThatAttributesAreUsed() +{ + Schema s; + addField(s, "ba", + Schema::INT32, Schema::SINGLE); + addField(s, "bb", + Schema::FLOAT, Schema::SINGLE); + addField(s, "bc", + Schema::STRING, Schema::SINGLE); + addField(s, "bd", + Schema::INT32, Schema::ARRAY); + addField(s, "be", + Schema::FLOAT, Schema::ARRAY); + addField(s, "bf", + Schema::STRING, Schema::ARRAY); + addField(s, "bg", + Schema::INT32, Schema::WEIGHTEDSET); + addField(s, "bh", + Schema::FLOAT, Schema::WEIGHTEDSET); + addField(s, "bi", + Schema::STRING, Schema::WEIGHTEDSET); + addField(s, "bj", Schema::TENSOR, Schema::SINGLE); + + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + dc.put(*bc._bld.startDocument("doc::1"). + endDocument(), + 1); // empty doc + dc.put(*bc._bld.startDocument("doc::2"). + startAttributeField("ba"). + addInt(10). + endField(). + startAttributeField("bb"). + addFloat(10.1). + endField(). + startAttributeField("bc"). + addStr("foo"). + endField(). + startAttributeField("bd"). + startElement(). + addInt(20). + endElement(). + startElement(). + addInt(30). + endElement(). + endField(). + startAttributeField("be"). + startElement(). + addFloat(20.2). + endElement(). + startElement(). + addFloat(30.3). + endElement(). + endField(). + startAttributeField("bf"). + startElement(). + addStr("bar"). + endElement(). + startElement(). + addStr("baz"). + endElement(). + endField(). + startAttributeField("bg"). + startElement(2). + addInt(40). + endElement(). + startElement(3). + addInt(50). + endElement(). + endField(). + startAttributeField("bh"). + startElement(4). + addFloat(40.4). + endElement(). + startElement(5). + addFloat(50.5). + endElement(). + endField(). + startAttributeField("bi"). + startElement(7). + addStr("quux"). + endElement(). + startElement(6). + addStr("qux"). + endElement(). + endField(). + startAttributeField("bj"). + addTensor(createTensor({ {{}, 3} }, { "x", "y"})). + endField(). + endDocument(), + 2); + dc.put(*bc._bld.startDocument("doc::3"). + endDocument(), + 3); // empty doc + + DocsumRequest req; + req.resultClassName = "class3"; + req.hits.push_back(DocsumRequest::Hit(gid2)); + req.hits.push_back(DocsumRequest::Hit(gid3)); + DocsumReply::UP rep = dc._ddb->getDocsums(req); + uint32_t rclass = 3; + + EXPECT_EQUAL(2u, rep->docsums.size()); + EXPECT_EQUAL(10u, getResult(*rep, 0, rclass)->GetEntry("ba")->_intval); + EXPECT_APPROX(10.1, getResult(*rep, 0, rclass)->GetEntry("bb")->_doubleval, + 10e-5); + EXPECT_TRUE(assertString("foo", "bc", *rep, 0, rclass)); + EXPECT_TRUE(assertString("[\"20\",\"30\"]", "bd", *rep, 0, rclass)); + EXPECT_TRUE(assertString("[\"20.2\",\"30.3\"]", "be", *rep, 0, rclass)); + EXPECT_TRUE(assertString("[\"bar\",\"baz\"]", "bf", *rep, 0, rclass)); + EXPECT_TRUE(assertString("[[\"40\",2],[\"50\",3]]", "bg", + *rep, 0, rclass)); + EXPECT_TRUE(assertString("[[\"40.4\",4],[\"50.5\",5]]", "bh", + *rep, 0, rclass)); + EXPECT_TRUE(assertString("[[\"quux\",7],[\"qux\",6]]", "bi", + *rep, 0, rclass)); + EXPECT_TRUE(assertString("{\"dimensions\":[\"x\",\"y\"]," + "\"cells\":[{\"address\":{},\"value\":3}]}", + "bj", *rep, 0, rclass)); + + // empty doc + EXPECT_TRUE(search::attribute::isUndefined<int32_t> + (getResult(*rep, 1, rclass)->GetEntry("ba")->_intval)); + EXPECT_TRUE(search::attribute::isUndefined<float> + (getResult(*rep, 1, rclass)->GetEntry("bb")->_doubleval)); + EXPECT_TRUE(assertString("", "bc", *rep, 1, rclass)); + EXPECT_TRUE(assertString("[]", "bd", *rep, 1, rclass)); + EXPECT_TRUE(assertString("[]", "be", *rep, 1, rclass)); + EXPECT_TRUE(assertString("[]", "bf", *rep, 1, rclass)); + EXPECT_TRUE(assertString("[]", "bg", *rep, 1, rclass)); + EXPECT_TRUE(assertString("[]", "bh", *rep, 1, rclass)); + EXPECT_TRUE(assertString("[]", "bi", *rep, 1, rclass)); + EXPECT_TRUE(assertString("", "bj", *rep, 1, rclass)); + + proton::IAttributeManager::SP attributeManager = + dc._ddb->getReadySubDB()->getAttributeManager(); + search::ISequencedTaskExecutor &attributeFieldWriter = + attributeManager->getAttributeFieldWriter(); + search::AttributeVector *bjAttr = + attributeManager->getWritableAttribute("bj"); + search::attribute::TensorAttribute *bjTensorAttr = + dynamic_cast<search::attribute::TensorAttribute *>(bjAttr); + + attributeFieldWriter. + execute("bj", + [&]() { bjTensorAttr->setTensor(3, + *createTensor({ {{}, 4} }, { "x"})); + bjTensorAttr->commit(); }); + attributeFieldWriter.sync(); + + DocsumReply::UP rep2 = dc._ddb->getDocsums(req); + EXPECT_TRUE(assertString("{\"dimensions\":[\"x\",\"y\"]," + "\"cells\":[{\"address\":{},\"value\":4}]}", + "bj", *rep2, 1, rclass)); + + DocsumRequest req3; + req3.resultClassName = "class3"; + req3._flags = ::search::fs4transport::GDFLAG_ALLOW_SLIME; + req3.hits.push_back(DocsumRequest::Hit(gid3)); + DocsumReply::UP rep3 = dc._ddb->getDocsums(req3); + + EXPECT_TRUE(assertSlime("{bd:[],be:[],bf:[],bg:[]," + "bh:[],bi:[]," + "bj:{dimensions:['x','y']," + "cells:[{address:{},value:4.0}]}}", + *rep3, 0)); +} + + +void +Test::requireThatSummaryAdapterHandlesPutAndRemove() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("f1", + Schema::STRING, + Schema::SINGLE)); + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + Document::UP exp = bc._bld.startDocument("doc::1"). + startSummaryField("f1"). + addStr("foo"). + endField(). + endDocument(); + dc._sa->put(1, *exp, 1); + IDocumentStore & store = + dc._ddb->getReadySubDB()->getSummaryManager()->getBackingStore(); + Document::UP act = store.read(1, *bc._repo); + EXPECT_TRUE(act.get() != NULL); + EXPECT_EQUAL(exp->getType(), act->getType()); + EXPECT_EQUAL("foo", act->getValue("f1")->toString()); + dc._sa->remove(2, 1); + EXPECT_TRUE(store.read(1, *bc._repo).get() == NULL); +} + + +const std::string TERM_ORIG = "\357\277\271"; +const std::string TERM_INDEX = "\357\277\272"; +const std::string TERM_END = "\357\277\273"; +const std::string TERM_SEP = "\037"; +const std::string TERM_EMPTY = ""; +namespace +{ + const std::string empty; +} + +void +Test::requireThatAnnotationsAreUsed() +{ + Schema s; + s.addIndexField(Schema::IndexField("g", + Schema::STRING, + Schema::SINGLE)); + s.addSummaryField(Schema::SummaryField("g", + Schema::STRING, + Schema::SINGLE)); + s.addIndexField(Schema::IndexField("dynamicstring", + Schema::STRING, + Schema::SINGLE)); + s.addSummaryField(Schema::SummaryField("dynamicstring", + Schema::STRING, + Schema::SINGLE)); + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + Document::UP exp = bc._bld.startDocument("doc::0"). + startIndexField("g"). + addStr("foo"). + addStr("bar"). + addTermAnnotation("baz"). + endField(). + startIndexField("dynamicstring"). + setAutoAnnotate(false). + addStr("foo"). + addSpan(). + addAlphabeticTokenAnnotation(). + addTermAnnotation(). + addNoWordStr(" "). + addSpan(). + addSpaceTokenAnnotation(). + addStr("bar"). + addSpan(). + addAlphabeticTokenAnnotation(). + addTermAnnotation("baz"). + setAutoAnnotate(true). + endField(). + endDocument(); + dc._sa->put(1, *exp, 1); + + IDocumentStore & store = + dc._ddb->getReadySubDB()->getSummaryManager()->getBackingStore(); + Document::UP act = store.read(1, *bc._repo); + EXPECT_TRUE(act.get() != NULL); + EXPECT_EQUAL(exp->getType(), act->getType()); + EXPECT_EQUAL("foo bar", act->getValue("g")->getAsString()); + EXPECT_EQUAL("foo bar", act->getValue("dynamicstring")->getAsString()); + + DocumentStoreAdapter dsa(store, *bc._repo, getResultConfig(), "class0", + bc.createFieldCacheRepo(getResultConfig())->getFieldCache("class0"), + getMarkupFields()); + EXPECT_TRUE(assertString("foo bar", "g", dsa, 1)); + EXPECT_TRUE(assertString(TERM_EMPTY + "foo" + TERM_SEP + + " " + TERM_SEP + + TERM_ORIG + "bar" + TERM_INDEX + "baz" + TERM_END + + TERM_SEP, + "dynamicstring", dsa, 1)); +} + +void +Test::requireThatUrisAreUsed() +{ + Schema s; + s.addUriIndexFields(Schema::IndexField("urisingle", + Schema::STRING, + Schema::SINGLE)); + s.addSummaryField(Schema::SummaryField("urisingle", + Schema::STRING, + Schema::SINGLE)); + s.addUriIndexFields(Schema::IndexField("uriarray", + Schema::STRING, + Schema::ARRAY)); + s.addSummaryField(Schema::SummaryField("uriarray", + Schema::STRING, + Schema::ARRAY)); + s.addUriIndexFields(Schema::IndexField("uriwset", + Schema::STRING, + Schema::WEIGHTEDSET)); + s.addSummaryField(Schema::SummaryField("uriwset", + Schema::STRING, + Schema::WEIGHTEDSET)); + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + Document::UP exp = bc._bld.startDocument("doc::0"). + startIndexField("urisingle"). + startSubField("all"). + addUrlTokenizedString( + "http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(). + startIndexField("uriarray"). + startElement(1). + startSubField("all"). + addUrlTokenizedString( + "http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString( + "http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(). + startIndexField("uriwset"). + startElement(4). + startSubField("all"). + addUrlTokenizedString( + "http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString( + "http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(). + endDocument(); + dc._sa->put(1, *exp, 1); + + IDocumentStore & store = + dc._ddb->getReadySubDB()->getSummaryManager()->getBackingStore(); + Document::UP act = store.read(1, *bc._repo); + EXPECT_TRUE(act.get() != NULL); + EXPECT_EQUAL(exp->getType(), act->getType()); + + DocumentStoreAdapter dsa(store, *bc._repo, getResultConfig(), "class0", + bc.createFieldCacheRepo(getResultConfig())->getFieldCache("class0"), + getMarkupFields()); + EXPECT_TRUE(assertString("http://www.yahoo.com:81/fluke?ab=2#4", + "urisingle", dsa, 1)); + EXPECT_TRUE(assertString("[\"http://www.yahoo.com:82/fluke?ab=2#8\"," + "\"http://www.flickr.com:82/fluke?ab=2#9\"]", + "uriarray", dsa, 1)); + EXPECT_TRUE(assertString("[" + "{\"item\":\"http://www.yahoo.com:83/fluke?ab=2#12\",\"weight\":4}" + "," + "{\"item\":\"http://www.flickr.com:85/fluke?ab=2#13\",\"weight\":7}" + "]", + "uriwset", dsa, 1)); +} + + +void +Test::requireThatPositionsAreUsed() +{ + Schema s; + s.addAttributeField(Schema::AttributeField("sp2", + Schema::INT64)); + s.addAttributeField(Schema::AttributeField("ap2", + Schema::INT64, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("wp2", + Schema::INT64, + Schema::WEIGHTEDSET)); + + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + Document::UP exp = bc._bld.startDocument("doc::1"). + startAttributeField("sp2"). + addPosition(1002, 1003). + endField(). + startAttributeField("ap2"). + startElement().addPosition(1006, 1007).endElement(). + startElement().addPosition(1008, 1009).endElement(). + endField(). + startAttributeField("wp2"). + startElement(43).addPosition(1012, 1013).endElement(). + startElement(44).addPosition(1014, 1015).endElement(). + endField(). + endDocument(); + dc.put(*exp, 1); + + IDocumentStore & store = + dc._ddb->getReadySubDB()->getSummaryManager()->getBackingStore(); + Document::UP act = store.read(1, *bc._repo); + EXPECT_TRUE(act.get() != NULL); + EXPECT_EQUAL(exp->getType(), act->getType()); + + DocsumRequest req; + req.resultClassName = "class5"; + req.hits.push_back(DocsumRequest::Hit(gid1)); + DocsumReply::UP rep = dc._ddb->getDocsums(req); + uint32_t rclass = 5; + + EXPECT_EQUAL(1u, rep->docsums.size()); + EXPECT_EQUAL(1u, rep->docsums[0].docid); + EXPECT_EQUAL(gid1, rep->docsums[0].gid); + EXPECT_TRUE(assertString("1047758", + "sp2", *rep, 0, rclass)); + EXPECT_TRUE(assertString("<position x=\"1002\" y=\"1003\" latlong=\"N0.001003;E0.001002\" />", + "sp2x", *rep, 0, rclass)); + EXPECT_TRUE(assertString("[1047806,1048322]", + "ap2", *rep, 0, rclass)); + EXPECT_TRUE(assertString("<position x=\"1006\" y=\"1007\" latlong=\"N0.001007;E0.001006\" />" + "<position x=\"1008\" y=\"1009\" latlong=\"N0.001009;E0.001008\" />", + "ap2x", *rep, 0, rclass)); + EXPECT_TRUE(assertString("[{\"item\":1048370,\"weight\":43},{\"item\":1048382,\"weight\":44}]", + "wp2", *rep, 0, rclass)); + EXPECT_TRUE(assertString("<position x=\"1012\" y=\"1013\" latlong=\"N0.001013;E0.001012\" />" + "<position x=\"1014\" y=\"1015\" latlong=\"N0.001015;E0.001014\" />", + "wp2x", *rep, 0, rclass)); +} + + +void +Test::requireThatRawFieldsWorks() +{ + Schema s; + s.addSummaryField(Schema::AttributeField("i", + Schema::RAW)); + s.addSummaryField(Schema::AttributeField("araw", + Schema::RAW, + Schema::ARRAY)); + s.addSummaryField(Schema::AttributeField("wraw", + Schema::RAW, + Schema::WEIGHTEDSET)); + + std::vector<char> binaryBlob; + binaryBlob.push_back('\0'); + binaryBlob.push_back('\2'); + binaryBlob.push_back('\1'); + std::string raw1s("Single Raw Element"); + std::string raw1a0("Array Raw Element 0"); + std::string raw1a1("Array Raw Element 1"); + std::string raw1w0("Weighted Set Raw Element 0"); + std::string raw1w1("Weighted Set Raw Element 1"); + raw1s += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1a0 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1a1 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1w0 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1w1 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + + BuildContext bc(s); + DBContext dc(bc._repo, getDocTypeName()); + Document::UP exp = bc._bld.startDocument("doc::0"). + startSummaryField("i"). + addRaw(raw1s.c_str(), raw1s.size()). + endField(). + startSummaryField("araw"). + startElement(). + addRaw(raw1a0.c_str(), raw1a0.size()). + endElement(). + startElement(). + addRaw(raw1a1.c_str(), raw1a1.size()). + endElement(). + endField(). + startSummaryField("wraw"). + startElement(46). + addRaw(raw1w1.c_str(), raw1w1.size()). + endElement(). + startElement(45). + addRaw(raw1w0.c_str(), raw1w0.size()). + endElement(). + endField(). + endDocument(); + dc._sa->put(1, *exp, 1); + + IDocumentStore & store = + dc._ddb->getReadySubDB()->getSummaryManager()->getBackingStore(); + Document::UP act = store.read(1, *bc._repo); + EXPECT_TRUE(act.get() != NULL); + EXPECT_EQUAL(exp->getType(), act->getType()); + + DocumentStoreAdapter dsa(store, *bc._repo, getResultConfig(), "class0", + bc.createFieldCacheRepo(getResultConfig())->getFieldCache("class0"), + getMarkupFields()); + + ASSERT_TRUE(assertString(raw1s, + "i", dsa, 1)); + ASSERT_TRUE(assertString(empty + "[\"" + + vespalib::Base64::encode(raw1a0) + + "\",\"" + + vespalib::Base64::encode(raw1a1) + + "\"]", + "araw", dsa, 1)); + ASSERT_TRUE(assertString(empty + "[{\"item\":\"" + + vespalib::Base64::encode(raw1w1) + + "\",\"weight\":46},{\"item\":\"" + + vespalib::Base64::encode(raw1w0) + + "\",\"weight\":45}]", + "wraw", dsa, 1)); +} + + +void +Test::requireThatFieldCacheRepoCanReturnDefaultFieldCache() +{ + Schema s; + s.addSummaryField(Schema::SummaryField("a", Schema::INT32)); + BuildContext bc(s); + FieldCacheRepo::UP repo = bc.createFieldCacheRepo(getResultConfig()); + FieldCache::CSP cache = repo->getFieldCache(""); + EXPECT_TRUE(cache.get() == repo->getFieldCache("class1").get()); + EXPECT_EQUAL(1u, cache->size()); + EXPECT_EQUAL("a", cache->getField(0)->getName()); +} + + +Test::Test() + : _summaryCfg(), + _resultCfg(), + _markupFields() +{ + std::string cfgId("summary"); + _summaryCfg = config::ConfigGetter<vespa::config::search::SummaryConfig>::getConfig(cfgId, config::FileSpec("summary.cfg")); + _resultCfg.ReadConfig(*_summaryCfg, cfgId.c_str()); + std::string mapCfgId("summarymap"); + std::unique_ptr<vespa::config::search::SummarymapConfig> mapCfg = config::ConfigGetter<vespa::config::search::SummarymapConfig>::getConfig(mapCfgId, config::FileSpec("summarymap.cfg")); + for (size_t i = 0; i < mapCfg->override.size(); ++i) { + const vespa::config::search::SummarymapConfig::Override & o = mapCfg->override[i]; + if (o.command == "dynamicteaser") { + vespalib::string markupField = o.arguments; + if (markupField.empty()) + continue; + // Assume just one argument: source field that must contain markup + _markupFields.insert(markupField); + LOG(info, + "Field %s has markup", + markupField.c_str()); + } + } +} + + +int +Test::Main() +{ + TEST_INIT("docsummary_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + TEST_DO(requireThatSummaryAdapterHandlesPutAndRemove()); + TEST_DO(requireThatAdapterHandlesAllFieldTypes()); + TEST_DO(requireThatAdapterHandlesMultipleDocuments()); + TEST_DO(requireThatAdapterHandlesDocumentIdField()); + TEST_DO(requireThatDocsumRequestIsProcessed()); + TEST_DO(requireThatRewritersAreUsed()); + TEST_DO(requireThatAttributesAreUsed()); + TEST_DO(requireThatAnnotationsAreUsed()); + TEST_DO(requireThatUrisAreUsed()); + TEST_DO(requireThatPositionsAreUsed()); + TEST_DO(requireThatRawFieldsWorks()); + TEST_DO(requireThatFieldCacheRepoCanReturnDefaultFieldCache()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(proton::Test); diff --git a/searchcore/src/tests/proton/docsummary/docsummary_test.sh b/searchcore/src/tests/proton/docsummary/docsummary_test.sh new file mode 100755 index 00000000000..4871911e1cd --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/docsummary_test.sh @@ -0,0 +1,15 @@ +#!/bin/bash +rm -rf tmp +rm -rf tmpdb +rm -rf summary +rm -rf indexingdocument +rm -rf searchdocument +rm -rf *.dat +$VALGRIND ./searchcore_docsummary_test_app +rm -rf tmp +rm -rf tmpdb +rm -rf summary +rm -rf indexingdocument +rm -rf searchdocument +rm -rf *.dat +$VALGRIND ./searchcore_summaryfieldconverter_test_app diff --git a/searchcore/src/tests/proton/docsummary/documentmanager.cfg b/searchcore/src/tests/proton/docsummary/documentmanager.cfg new file mode 100644 index 00000000000..91c69cc0c70 --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/documentmanager.cfg @@ -0,0 +1,81 @@ +enablecompression false +datatype[6] +datatype[0].id -1636745577 +datatype[0].arraytype[0] +datatype[0].weightedsettype[0] +datatype[0].structtype[1] +datatype[0].structtype[0].name typea.header +datatype[0].structtype[0].version 0 +datatype[0].structtype[0].field[4] +datatype[0].structtype[0].field[0].name floatfield +datatype[0].structtype[0].field[0].id[0] +datatype[0].structtype[0].field[0].datatype 1 +datatype[0].structtype[0].field[1].name stringfield +datatype[0].structtype[0].field[1].id[0] +datatype[0].structtype[0].field[1].datatype 2 +datatype[0].structtype[0].field[2].name longfield +datatype[0].structtype[0].field[2].id[0] +datatype[0].structtype[0].field[2].datatype 4 +datatype[0].structtype[0].field[3].name urifield +datatype[0].structtype[0].field[3].id[0] +datatype[0].structtype[0].field[3].datatype 10 +datatype[0].documenttype[0] +datatype[1].id 1878320748 +datatype[1].arraytype[0] +datatype[1].weightedsettype[0] +datatype[1].structtype[1] +datatype[1].structtype[0].name typea.body +datatype[1].structtype[0].version 0 +datatype[1].structtype[0].field[4] +datatype[1].structtype[0].field[0].name intfield +datatype[1].structtype[0].field[0].id[0] +datatype[1].structtype[0].field[0].datatype 0 +datatype[1].structtype[0].field[1].name rawfield +datatype[1].structtype[0].field[1].id[0] +datatype[1].structtype[0].field[1].datatype 3 +datatype[1].structtype[0].field[2].name doublefield +datatype[1].structtype[0].field[2].id[0] +datatype[1].structtype[0].field[2].datatype 5 +datatype[1].structtype[0].field[3].name bytefield +datatype[1].structtype[0].field[3].id[0] +datatype[1].structtype[0].field[3].datatype 16 +datatype[1].documenttype[0] +datatype[2].id -1175657560 +datatype[2].arraytype[0] +datatype[2].weightedsettype[0] +datatype[2].structtype[0] +datatype[2].documenttype[1] +datatype[2].documenttype[0].name typea +datatype[2].documenttype[0].version 0 +datatype[2].documenttype[0].inherits[0] +datatype[2].documenttype[0].headerstruct -1636745577 +datatype[2].documenttype[0].bodystruct 1878320748 +datatype[3].id 192273965 +datatype[3].arraytype[0] +datatype[3].weightedsettype[0] +datatype[3].structtype[1] +datatype[3].structtype[0].name typeb.header +datatype[3].structtype[0].version 0 +datatype[3].structtype[0].field[0] +datatype[3].documenttype[0] +datatype[4].id -72846462 +datatype[4].arraytype[0] +datatype[4].weightedsettype[0] +datatype[4].structtype[1] +datatype[4].structtype[0].name typeb.body +datatype[4].structtype[0].version 0 +datatype[4].structtype[0].field[1] +datatype[4].structtype[0].field[0].name intfield +datatype[4].structtype[0].field[0].id[0] +datatype[4].structtype[0].field[0].datatype 0 +datatype[4].documenttype[0] +datatype[5].id -1146158894 +datatype[5].arraytype[0] +datatype[5].weightedsettype[0] +datatype[5].structtype[0] +datatype[5].documenttype[1] +datatype[5].documenttype[0].name typeb +datatype[5].documenttype[0].version 0 +datatype[5].documenttype[0].inherits[0] +datatype[5].documenttype[0].headerstruct 192273965 +datatype[5].documenttype[0].bodystruct -72846462 diff --git a/searchcore/src/tests/proton/docsummary/indexingdocument.cfg b/searchcore/src/tests/proton/docsummary/indexingdocument.cfg new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/indexingdocument.cfg diff --git a/searchcore/src/tests/proton/docsummary/indexschema.cfg b/searchcore/src/tests/proton/docsummary/indexschema.cfg new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/indexschema.cfg diff --git a/searchcore/src/tests/proton/docsummary/juniperrc.cfg b/searchcore/src/tests/proton/docsummary/juniperrc.cfg new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/juniperrc.cfg diff --git a/searchcore/src/tests/proton/docsummary/rank-profiles.cfg b/searchcore/src/tests/proton/docsummary/rank-profiles.cfg new file mode 100644 index 00000000000..34d8f0245df --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/rank-profiles.cfg @@ -0,0 +1,2 @@ +rankprofile[1] +rankprofile[0].name default diff --git a/searchcore/src/tests/proton/docsummary/summary.cfg b/searchcore/src/tests/proton/docsummary/summary.cfg new file mode 100644 index 00000000000..52f300ae3e0 --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/summary.cfg @@ -0,0 +1,108 @@ +defaultsummaryid 1 +classes[6] +classes[0].name "class0" +classes[0].id 0 +classes[0].fields[24] +classes[0].fields[0].name "a" +classes[0].fields[0].type "byte" +classes[0].fields[1].name "b" +classes[0].fields[1].type "short" +classes[0].fields[2].name "c" +classes[0].fields[2].type "integer" +classes[0].fields[3].name "d" +classes[0].fields[3].type "int64" +classes[0].fields[4].name "e" +classes[0].fields[4].type "float" +classes[0].fields[5].name "f" +classes[0].fields[5].type "double" +classes[0].fields[6].name "g" +classes[0].fields[6].type "string" +classes[0].fields[7].name "h" +classes[0].fields[7].type "longstring" +classes[0].fields[8].name "i" +classes[0].fields[8].type "data" +classes[0].fields[9].name "j" +classes[0].fields[9].type "longdata" +classes[0].fields[10].name "k" +classes[0].fields[10].type "xmlstring" +classes[0].fields[11].name "l" +classes[0].fields[11].type "jsonstring" +classes[0].fields[12].name "dynamicstring" +classes[0].fields[12].type "string" +classes[0].fields[13].name "urisingle" +classes[0].fields[13].type "string" +classes[0].fields[14].name "uriarray" +classes[0].fields[14].type "jsonstring" +classes[0].fields[15].name "uriwset" +classes[0].fields[15].type "jsonstring" +classes[0].fields[16].name "sp1" +classes[0].fields[16].type "string" +classes[0].fields[17].name "sp2" +classes[0].fields[17].type "string" +classes[0].fields[18].name "ap1" +classes[0].fields[18].type "jsonstring" +classes[0].fields[19].name "ap2" +classes[0].fields[19].type "jsonstring" +classes[0].fields[20].name "wp1" +classes[0].fields[20].type "jsonstring" +classes[0].fields[21].name "wp2" +classes[0].fields[21].type "jsonstring" +classes[0].fields[22].name "araw" +classes[0].fields[22].type "jsonstring" +classes[0].fields[23].name "wraw" +classes[0].fields[23].type "jsonstring" +classes[1].name "class1" +classes[1].id 1 +classes[1].fields[1] +classes[1].fields[0].name "a" +classes[1].fields[0].type "integer" +classes[2].name "class2" +classes[2].id 2 +classes[2].fields[2] +classes[2].fields[0].name "aa" +classes[2].fields[0].type "integer" +classes[2].fields[1].name "ab" +classes[2].fields[1].type "integer" +classes[3].name "class3" +classes[3].id 3 +classes[3].fields[10] +classes[3].fields[0].name "ba" +classes[3].fields[0].type "integer" +classes[3].fields[1].name "bb" +classes[3].fields[1].type "float" +classes[3].fields[2].name "bc" +classes[3].fields[2].type "longstring" +classes[3].fields[3].name "bd" +classes[3].fields[3].type "jsonstring" +classes[3].fields[4].name "be" +classes[3].fields[4].type "jsonstring" +classes[3].fields[5].name "bf" +classes[3].fields[5].type "jsonstring" +classes[3].fields[6].name "bg" +classes[3].fields[6].type "jsonstring" +classes[3].fields[7].name "bh" +classes[3].fields[7].type "jsonstring" +classes[3].fields[8].name "bi" +classes[3].fields[8].type "jsonstring" +classes[3].fields[9].name "bj" +classes[3].fields[9].type "jsonstring" +classes[4].name "class4" +classes[4].id 4 +classes[4].fields[1] +classes[4].fields[0].name "documentid" +classes[4].fields[0].type "longstring" +classes[5].id 5 +classes[5].name "class5" +classes[5].fields[6] +classes[5].fields[0].name "sp2" +classes[5].fields[0].type "string" +classes[5].fields[1].name "sp2x" +classes[5].fields[1].type "xmlstring" +classes[5].fields[2].name "ap2" +classes[5].fields[2].type "jsonstring" +classes[5].fields[3].name "ap2x" +classes[5].fields[3].type "xmlstring" +classes[5].fields[4].name "wp2" +classes[5].fields[4].type "jsonstring" +classes[5].fields[5].name "wp2x" +classes[5].fields[5].type "xmlstring" diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp new file mode 100644 index 00000000000..f2e5f1a508b --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp @@ -0,0 +1,713 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for summaryfieldconverter. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("summaryfieldconverter_test"); + +#include <vespa/document/annotation/annotation.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantree.h> +#include <vespa/document/base/documentid.h> +#include <vespa/document/base/exceptions.h> +#include <vespa/document/base/field.h> +#include <vespa/document/datatype/annotationtype.h> +#include <vespa/document/datatype/arraydatatype.h> +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/datatype/documenttype.h> +#include <vespa/document/datatype/structdatatype.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/datatype/weightedsetdatatype.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/bytefieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/doublefieldvalue.h> +#include <vespa/document/fieldvalue/floatfieldvalue.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/longfieldvalue.h> +#include <vespa/document/fieldvalue/predicatefieldvalue.h> +#include <vespa/document/fieldvalue/rawfieldvalue.h> +#include <vespa/document/fieldvalue/shortfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/predicate/predicate.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/searchcore/proton/docsummary/summaryfieldconverter.h> +#include <vespa/searchcore/proton/docsummary/linguisticsannotation.h> +#include <vespa/searchcore/proton/docsummary/searchdatatype.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/config-summarymap.h> +#include <vespa/vespalib/geo/zcurve.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/data/slime/json_format.h> +#include <vespa/vespalib/data/slime/binary_format.h> +#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/types.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/tensor_factory.h> + +using vespa::config::search::SummarymapConfig; +using vespa::config::search::SummarymapConfigBuilder; +using document::Annotation; +using document::AnnotationType; +using document::ArrayDataType; +using document::ArrayFieldValue; +using document::ByteFieldValue; +using document::DataType; +using document::Document; +using document::DocumenttypesConfig; +using document::DocumenttypesConfigBuilder; +using document::DocumentId; +using document::DocumentType; +using document::DocumentTypeRepo; +using document::DoubleFieldValue; +using document::FeatureSet; +using document::Field; +using document::FieldNotFoundException; +using document::FieldValue; +using document::FloatFieldValue; +using document::IntFieldValue; +using document::LongFieldValue; +using document::Predicate; +using document::PredicateFieldValue; +using document::RawFieldValue; +using document::ShortFieldValue; +using document::Span; +using document::SpanList; +using document::SpanTree; +using document::StringFieldValue; +using document::StructDataType; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetDataType; +using document::WeightedSetFieldValue; +using document::TensorFieldValue; +using search::index::Schema; +using vespalib::Slime; +using vespalib::slime::Cursor; +using vespalib::string; +using namespace proton; +using namespace proton::linguistics; +using vespalib::geo::ZCurve; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::TensorDimensions; + +typedef SummaryFieldConverter SFC; + +namespace { + +struct FieldBlock { + vespalib::string input; + Slime slime; + search::RawBuf binary; + vespalib::string json; + + explicit FieldBlock(const vespalib::string &jsonInput) + : input(jsonInput), slime(), binary(1024), json() + { + size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime); + EXPECT_EQUAL(jsonInput.size(), used); + { + search::SlimeOutputRawBufAdapter adapter(binary); + vespalib::slime::JsonFormat::encode(slime, adapter, true); + json.assign(binary.GetDrainPos(), binary.GetUsedLen()); + binary.reset(); + } + search::SlimeOutputRawBufAdapter adapter(binary); + vespalib::slime::BinaryFormat::encode(slime, adapter); + } +}; + +class Test : public vespalib::TestApp { + std::unique_ptr<Schema> _schema; + std::unique_ptr<SummarymapConfigBuilder> _summarymap; + DocumentTypeRepo::SP _documentRepo; + const DocumentType *_documentType; + document::FixedTypeRepo _fixedRepo; + + void setUp(); + void tearDown(); + + const DataType &getDataType(const string &name) const; + + template <typename T> + T getValueAs(const string &field_name, const Document &doc); + + template <typename T> + T + cvtValueAs(const FieldValue::UP &fv); + + template <typename T> + T + cvtAttributeAs(const FieldValue::UP &fv); + + template <typename T> + T + cvtSummaryAs(bool markup, const FieldValue::UP &fv); + + void checkString(const string &str, const FieldValue *value); + void checkData(const search::RawBuf &data, const FieldValue *value); + void checkArray(const string &str, const FieldValue *value); + template <unsigned int N> + void checkArray(const char *(&str)[N], const FieldValue *value); + Document getDoc(const string &name, const Document *doc); + void setIndexField(const string &name); + void setSummaryField(const string &name); + void setAttributeField(const string &name); + + void requireThatSummaryIsAnUnmodifiedString(); + void requireThatAttributeIsAnUnmodifiedString(); + void requireThatArrayIsFlattenedInSummaryField(); + void requireThatWeightedSetIsFlattenedInSummaryField(); + void requireThatPositionsAreTransformedInSummary(); + void requireThatArrayIsPreservedInAttributeField(); + void requireThatPositionsAreTransformedInAttributeField(); + void requireThatPositionArrayIsTransformedInAttributeField(); + void requireThatPositionWeightedSetIsTransformedInAttributeField(); + void requireThatAttributeCanBePrimitiveTypes(); + void requireThatSummaryCanBePrimitiveTypes(); + void requireThatSummaryHandlesCjk(); + void requireThatSearchDataTypeUsesDefaultDataTypes(); + void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); + void requireThatPredicateIsPrinted(); + void requireThatTensorIsPrinted(); + const DocumentType &getDocType() const { return *_documentType; } + Document makeDocument(); + StringFieldValue annotateTerm(const string &term); + StringFieldValue makeAnnotatedChineseString(); + StringFieldValue makeAnnotatedString(); + void setSpanTree(StringFieldValue & value, SpanTree::UP tree); +public: + Test(); + int Main(); +}; + +DocumenttypesConfig getDocumenttypesConfig() { + using namespace document::config_builder; + DocumenttypesConfigBuilderHelper builder; + builder.document(42, "indexingdocument", + Struct("indexingdocument.header") + .addField("empty", DataType::T_STRING) + .addField("string", DataType::T_STRING) + .addField("plain_string", DataType::T_STRING) + .addField("string_array", Array(DataType::T_STRING)) + .addField("string_wset", Wset(DataType::T_STRING)) + .addField("position1", DataType::T_INT) + .addField("position2", DataType::T_LONG) + .addField("position2_array", Array(DataType::T_LONG)) + .addField("position2_wset", Wset(DataType::T_LONG)) + .addField("uri", UrlDataType::getInstance().getId()) + .addField("uri_array", + Array(UrlDataType::getInstance().getId())) + .addField("int", DataType::T_INT) + .addField("long", DataType::T_LONG) + .addField("short", DataType::T_SHORT) + .addField("byte", DataType::T_BYTE) + .addField("double", DataType::T_DOUBLE) + .addField("float", DataType::T_FLOAT) + .addField("chinese", DataType::T_STRING) + .addField("predicate", DataType::T_PREDICATE) + .addField("tensor", DataType::T_TENSOR), + Struct("indexingdocument.body")); + return builder.config(); +} + +Test::Test() : + _documentRepo(new DocumentTypeRepo(getDocumenttypesConfig())), + _documentType(_documentRepo->getDocumentType("indexingdocument")), + _fixedRepo(*_documentRepo, *_documentType) +{ + ASSERT_TRUE(_documentType); +} + +#define TEST_CALL(func) \ + TEST_DO(setUp()); \ + TEST_DO(func); \ + TEST_DO(tearDown()) + +int +Test::Main() +{ + TEST_INIT("summaryfieldconverter_test"); + + TEST_CALL(requireThatSummaryIsAnUnmodifiedString()); + TEST_CALL(requireThatAttributeIsAnUnmodifiedString()); + TEST_CALL(requireThatArrayIsFlattenedInSummaryField()); + TEST_CALL(requireThatWeightedSetIsFlattenedInSummaryField()); + TEST_CALL(requireThatPositionsAreTransformedInSummary()); + TEST_CALL(requireThatArrayIsPreservedInAttributeField()); + TEST_CALL(requireThatPositionsAreTransformedInAttributeField()); + TEST_CALL(requireThatPositionArrayIsTransformedInAttributeField()); + TEST_CALL(requireThatPositionWeightedSetIsTransformedInAttributeField()); + TEST_CALL(requireThatAttributeCanBePrimitiveTypes()); + TEST_CALL(requireThatSummaryCanBePrimitiveTypes()); + TEST_CALL(requireThatSummaryHandlesCjk()); + TEST_CALL(requireThatSearchDataTypeUsesDefaultDataTypes()); + TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); + TEST_CALL(requireThatPredicateIsPrinted()); + TEST_CALL(requireThatTensorIsPrinted()); + + TEST_DONE(); +} + +void Test::setUp() { + _schema.reset(new Schema); + _summarymap.reset(new SummarymapConfigBuilder); +} + +void Test::tearDown() { +} + +const DataType &Test::getDataType(const string &name) const { + const DataType *type = _documentRepo->getDataType(*_documentType, name); + ASSERT_TRUE(type); + return *type; +} + +template <typename T> +std::unique_ptr<T> makeUP(T *p) { return std::unique_ptr<T>(p); } + +StringFieldValue Test::makeAnnotatedString() { + SpanList *span_list = new SpanList; + SpanTree::UP tree(new SpanTree(SPANTREE_NAME, makeUP(span_list))); + // Annotations don't have to be added sequentially. + tree->annotate(span_list->add(makeUP(new Span(8, 3))), + makeUP(new Annotation(*TERM, + makeUP(new StringFieldValue( + "Annotation"))))); + tree->annotate(span_list->add(makeUP(new Span(0, 3))), *TERM); + tree->annotate(span_list->add(makeUP(new Span(4, 3))), *TERM); + tree->annotate(span_list->add(makeUP(new Span(4, 3))), + makeUP(new Annotation(*TERM, + makeUP(new StringFieldValue( + "Multiple"))))); + tree->annotate(span_list->add(makeUP(new Span(1, 2))), + makeUP(new Annotation(*TERM, + makeUP(new StringFieldValue( + "Overlap"))))); + StringFieldValue value("Foo Bar Baz"); + setSpanTree(value, std::move(tree)); + return value; +} + +StringFieldValue Test::annotateTerm(const string &term) { + SpanTree::UP tree(new SpanTree(SPANTREE_NAME, makeUP(new Span(0, term.size())))); + tree->annotate(tree->getRoot(), *TERM); + StringFieldValue value(term); + setSpanTree(value, std::move(tree)); + return value; +} + +void Test::setSpanTree(StringFieldValue & value, SpanTree::UP tree) { + StringFieldValue::SpanTrees trees; + trees.push_back(std::move(tree)); + value.setSpanTrees(trees, _fixedRepo); +} + +StringFieldValue Test::makeAnnotatedChineseString() { + SpanList *span_list = new SpanList; + SpanTree::UP tree(new SpanTree(SPANTREE_NAME, makeUP(span_list))); + // These chinese characters each use 3 bytes in their UTF8 encoding. + tree->annotate(span_list->add(makeUP(new Span(0, 15))), *TERM); + tree->annotate(span_list->add(makeUP(new Span(15, 9))), *TERM); + StringFieldValue value("我就是那个大灰狼"); + setSpanTree(value, std::move(tree)); + return value; +} + +Document Test::makeDocument() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("string", makeAnnotatedString()); + + doc.setValue("plain_string", StringFieldValue("Plain")); + + ArrayFieldValue array(getDataType("Array<String>")); + array.add(annotateTerm("\"foO\"")); + array.add(annotateTerm("ba\\R")); + doc.setValue("string_array", array); + + WeightedSetFieldValue wset(getDataType("WeightedSet<String>")); + wset.add(annotateTerm("\"foo\""), 2); + wset.add(annotateTerm("ba\\r"), 4); + doc.setValue("string_wset", wset); + + doc.setValue("position1", IntFieldValue(5)); + + doc.setValue("position2", LongFieldValue(ZCurve::encode(4, 2))); + + StructFieldValue uri(getDataType("url")); + uri.setValue("all", annotateTerm("http://www.yahoo.com:42/foobar?q#frag")); + uri.setValue("scheme", annotateTerm("http")); + uri.setValue("host", annotateTerm("www.yahoo.com")); + uri.setValue("port", annotateTerm("42")); + uri.setValue("path", annotateTerm("foobar")); + uri.setValue("query", annotateTerm("q")); + uri.setValue("fragment", annotateTerm("frag")); + doc.setValue("uri", uri); + + ArrayFieldValue uri_array(getDataType("Array<url>")); + uri.setValue("all", annotateTerm("http://www.yahoo.com:80/foobar?q#frag")); + uri.setValue("port", annotateTerm("80")); + uri_array.add(uri); + uri.setValue("all", annotateTerm("https://www.yahoo.com:443/foo?q#frag")); + uri.setValue("scheme", annotateTerm("https")); + uri.setValue("path", annotateTerm("foo")); + uri.setValue("port", annotateTerm("443")); + uri_array.add(uri); + doc.setValue("uri_array", uri_array); + + ArrayFieldValue position2_array(getDataType("Array<Long>")); + position2_array.add(LongFieldValue(ZCurve::encode(4, 2))); + position2_array.add(LongFieldValue(ZCurve::encode(4, 4))); + doc.setValue("position2_array", position2_array); + + WeightedSetFieldValue position2_wset(getDataType("WeightedSet<Long>")); + position2_wset.add(LongFieldValue(ZCurve::encode(4, 2)), 4); + position2_wset.add(LongFieldValue(ZCurve::encode(4, 4)), 2); + doc.setValue("position2_wset", position2_wset); + + doc.setValue("int", IntFieldValue(42)); + doc.setValue("long", LongFieldValue(84)); + doc.setValue("short", ShortFieldValue(21)); + doc.setValue("byte", ByteFieldValue(11)); + doc.setValue("double", DoubleFieldValue(0.4)); + doc.setValue("float", FloatFieldValue(0.2f)); + + doc.setValue("chinese", makeAnnotatedChineseString()); + return doc; +} + +template <typename T> +T Test::getValueAs(const string &field_name, const Document &doc) { + FieldValue::UP fv(doc.getValue(field_name)); + const T *value = dynamic_cast<const T *>(fv.get()); + ASSERT_TRUE(value); + return *value; +} + +template <typename T> +T +Test::cvtValueAs(const FieldValue::UP &fv) +{ + ASSERT_TRUE(fv.get() != NULL); + const T *value = dynamic_cast<const T *>(fv.get()); + ASSERT_TRUE(value); + return *value; +} + +template <typename T> +T +Test::cvtAttributeAs(const FieldValue::UP &fv) +{ + ASSERT_TRUE(fv.get() != NULL); + return cvtValueAs<T>(fv); +} + +template <typename T> +T +Test::cvtSummaryAs(bool markup, const FieldValue::UP &fv) +{ + ASSERT_TRUE(fv.get() != NULL); + FieldValue::UP r = SFC::convertSummaryField(markup, *fv, false); + return cvtValueAs<T>(r); +} + +void Test::checkString(const string &str, const FieldValue *value) { + ASSERT_TRUE(value); + const StringFieldValue *s = dynamic_cast<const StringFieldValue *>(value); + ASSERT_TRUE(s); + // fprintf(stderr, ">>>%s<<< >>>%s<<<\n", str.c_str(), s->getValue().c_str()); + EXPECT_EQUAL(str, s->getValue()); +} + +void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { + ASSERT_TRUE(value); + const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value); + ASSERT_TRUE(s); + auto got = s->getAsRaw(); + EXPECT_EQUAL(buf.GetUsedLen(), got.second); + EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); +} + +void Test::checkArray(const string &str, const FieldValue *value) { + ASSERT_TRUE(value); + const ArrayFieldValue *a = dynamic_cast<const ArrayFieldValue *>(value); + ASSERT_TRUE(a); + EXPECT_EQUAL(1u, a->size()); + checkString(str, &(*a)[0]); +} + +template <unsigned int N> +void Test::checkArray(const char *(&str)[N], const FieldValue *value) { + ASSERT_TRUE(value); + const ArrayFieldValue *a = dynamic_cast<const ArrayFieldValue *>(value); + ASSERT_TRUE(a); + EXPECT_EQUAL(N, a->size()); + for (size_t i = 0; i < a->size() && i < N; ++i) { + checkString(str[i], &(*a)[i]); + } +} + +Document Test::getDoc(const string &name, const Document *doc) { + ASSERT_TRUE(doc); + return getValueAs<Document>(name, *doc); +} + +void Test::setIndexField(const string &field) { + _schema->addIndexField( + Schema::IndexField(field, Schema::STRING)); +} + +void Test::setSummaryField(const string &field) { + _schema->addSummaryField(Schema::Field(field, Schema::STRING)); +} + +void Test::setAttributeField(const string &field) { + _schema->addAttributeField(Schema::Field(field, Schema::STRING)); +} + +void Test::requireThatSummaryIsAnUnmodifiedString() { + setSummaryField("string"); + Document summary = makeDocument(); + checkString("Foo Bar Baz", SFC::convertSummaryField(false, + *summary.getValue("string"), + false).get()); +} + +void Test::requireThatAttributeIsAnUnmodifiedString() { + setAttributeField("string"); + Document attribute = makeDocument(); + checkString("Foo Bar Baz", + attribute.getValue("string").get()); +} + +void Test::requireThatArrayIsFlattenedInSummaryField() { + setSummaryField("string_array"); + Document summary = makeDocument(); + FieldBlock expect("[\"\\\"foO\\\"\",\"ba\\\\R\"]"); + checkString(expect.json, + SFC::convertSummaryField(false, + *summary.getValue("string_array"), + false).get()); + checkData(expect.binary, + SFC::convertSummaryField(false, + *summary.getValue("string_array"), + true).get()); +} + +void Test::requireThatWeightedSetIsFlattenedInSummaryField() { + setSummaryField("string_wset"); + Document summary = makeDocument(); + FieldBlock expect("[{\"item\":\"\\\"foo\\\"\",\"weight\":2},{\"item\":\"ba\\\\r\",\"weight\":4}]"); + checkString(expect.json, + SFC::convertSummaryField(false, + *summary.getValue("string_wset"), + false).get()); + checkData(expect.binary, + SFC::convertSummaryField(false, + *summary.getValue("string_wset"), + true).get()); +} + +void Test::requireThatPositionsAreTransformedInSummary() { + setSummaryField("position1"); + setSummaryField("position2"); + Document summary = makeDocument(); + FieldValue::UP fv = summary.getValue("position1"); + EXPECT_EQUAL(5, cvtSummaryAs<IntFieldValue>(false, fv).getValue()); + FieldValue::UP fv2 = summary.getValue("position2"); + EXPECT_EQUAL(24, cvtSummaryAs<LongFieldValue>(false, fv2).getValue()); +} + +void Test::requireThatArrayIsPreservedInAttributeField() { + setAttributeField("string_array"); + Document attribute = makeDocument(); + const char *array[] = { "\"foO\"", "ba\\R" }; + checkArray(array, + attribute.getValue("string_array").get()); +} + +void Test::requireThatPositionsAreTransformedInAttributeField() { + setAttributeField("position1"); + setAttributeField("position2"); + Document attr = makeDocument(); + FieldValue::UP fv = attr.getValue("position1"); + EXPECT_EQUAL(5, cvtAttributeAs<IntFieldValue>(fv).getValue()); + fv = attr.getValue("position2"); + EXPECT_EQUAL(24, cvtAttributeAs<LongFieldValue>(fv).getValue()); +} + +void Test::requireThatPositionArrayIsTransformedInAttributeField() { + setAttributeField("position2_array"); + Document attr = makeDocument(); + FieldValue::UP fv = attr.getValue("position2_array"); + ArrayFieldValue a = cvtAttributeAs<ArrayFieldValue>(fv); + EXPECT_EQUAL(2u, a.size()); + EXPECT_EQUAL(24, dynamic_cast<LongFieldValue &>(a[0]).getValue()); + EXPECT_EQUAL(48, dynamic_cast<LongFieldValue &>(a[1]).getValue()); +} + +void Test::requireThatPositionWeightedSetIsTransformedInAttributeField() { + setAttributeField("position2_wset"); + Document attr = makeDocument(); + FieldValue::UP fv = attr.getValue("position2_wset"); + WeightedSetFieldValue w = cvtAttributeAs<WeightedSetFieldValue>(fv); + EXPECT_EQUAL(2u, w.size()); + WeightedSetFieldValue::iterator it = w.begin(); + EXPECT_EQUAL(24, dynamic_cast<const LongFieldValue&>(*it->first).getValue()); + EXPECT_EQUAL(4, dynamic_cast<IntFieldValue &>(*it->second).getValue()); + ++it; + EXPECT_EQUAL(48, dynamic_cast<const LongFieldValue&>(*it->first).getValue()); + EXPECT_EQUAL(2, dynamic_cast<IntFieldValue &>(*it->second).getValue()); +} + +void Test::requireThatAttributeCanBePrimitiveTypes() { + setAttributeField("int"); + setAttributeField("long"); + setAttributeField("short"); + setAttributeField("byte"); + setAttributeField("double"); + setAttributeField("float"); + Document attribute = makeDocument(); + FieldValue::UP fv = attribute.getValue("int"); + EXPECT_EQUAL(42, cvtAttributeAs<IntFieldValue>(fv).getValue()); + fv = attribute.getValue("long"); + EXPECT_EQUAL(84, cvtAttributeAs<LongFieldValue>(fv).getValue()); + fv = attribute.getValue("short"); + EXPECT_EQUAL(21, cvtAttributeAs<ShortFieldValue>(fv).getValue()); + fv = attribute.getValue("byte"); + EXPECT_EQUAL(11, cvtAttributeAs<ByteFieldValue>(fv).getValue()); + fv = attribute.getValue("double"); + EXPECT_EQUAL(0.4, cvtAttributeAs<DoubleFieldValue>(fv).getValue()); + fv = attribute.getValue("float"); + EXPECT_EQUAL(0.2f, cvtAttributeAs<FloatFieldValue>(fv).getValue()); +} + +void Test::requireThatSummaryCanBePrimitiveTypes() { + setSummaryField("int"); + setSummaryField("long"); + setSummaryField("short"); + setSummaryField("byte"); + setSummaryField("double"); + setSummaryField("float"); + Document summary = makeDocument(); + FieldValue::UP fv = summary.getValue("int"); + EXPECT_EQUAL(42, cvtSummaryAs<IntFieldValue>(false, fv).getValue()); + fv = summary.getValue("long"); + EXPECT_EQUAL(84, cvtSummaryAs<LongFieldValue>(false, fv).getValue()); + fv = summary.getValue("short"); + EXPECT_EQUAL(21, cvtSummaryAs<ShortFieldValue>(false, fv).getValue()); + fv = summary.getValue("byte"); + EXPECT_EQUAL(11, cvtSummaryAs<ShortFieldValue>(false, fv).getValue()); + fv = summary.getValue("double"); + EXPECT_EQUAL(0.4, cvtSummaryAs<DoubleFieldValue>(false, fv).getValue()); + fv = summary.getValue("float"); + EXPECT_EQUAL(0.2f, cvtSummaryAs<FloatFieldValue>(false, fv).getValue()); +} + +void Test::requireThatSummaryHandlesCjk() { + Document summary = makeDocument(); + FieldValue::UP fv = summary.getValue("chinese"); + EXPECT_EQUAL("我就是那个\037大灰狼\037", + cvtSummaryAs<StringFieldValue>(true, fv).getValue()); +} + +void Test::requireThatSearchDataTypeUsesDefaultDataTypes() { + const StructDataType *uri = + dynamic_cast<const StructDataType *>(SearchDataType::URI); + ASSERT_TRUE(uri); + ASSERT_TRUE(uri->hasField("all")); + ASSERT_TRUE(uri->hasField("scheme")); + ASSERT_TRUE(uri->hasField("host")); + ASSERT_TRUE(uri->hasField("port")); + ASSERT_TRUE(uri->hasField("path")); + ASSERT_TRUE(uri->hasField("query")); + ASSERT_TRUE(uri->hasField("fragment")); + EXPECT_EQUAL(*DataType::STRING, uri->getField("all").getDataType()); + EXPECT_EQUAL(*DataType::STRING, uri->getField("scheme").getDataType()); + EXPECT_EQUAL(*DataType::STRING, uri->getField("host").getDataType()); + EXPECT_EQUAL(*DataType::STRING, uri->getField("port").getDataType()); + EXPECT_EQUAL(*DataType::STRING, uri->getField("path").getDataType()); + EXPECT_EQUAL(*DataType::STRING, uri->getField("query").getDataType()); + EXPECT_EQUAL(*DataType::STRING, uri->getField("fragment").getDataType()); +} + +void Test::requireThatLinguisticsAnnotationUsesDefaultDataTypes() { + EXPECT_EQUAL(*AnnotationType::TERM, *linguistics::TERM); + ASSERT_TRUE(AnnotationType::TERM->getDataType()); + ASSERT_TRUE(linguistics::TERM->getDataType()); + EXPECT_EQUAL(*AnnotationType::TERM->getDataType(), + *linguistics::TERM->getDataType()); +} + +void +Test::requireThatPredicateIsPrinted() +{ + std::unique_ptr<Slime> input(new Slime()); + Cursor &obj = input->setObject(); + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET); + obj.setString(Predicate::KEY, "foo"); + Cursor &arr = obj.setArray(Predicate::SET); + arr.addString("bar"); + + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("predicate", PredicateFieldValue(std::move(input))); + + checkString("'foo' in ['bar']\n", + SFC::convertSummaryField(false, *doc.getValue("predicate"), false).get()); +} + + +Tensor::UP +createTensor(const TensorCells &cells, const TensorDimensions &dimensions) { + vespalib::tensor::DefaultTensor::builder builder; + return vespalib::tensor::TensorFactory::create(cells, dimensions, builder); +} + +void +Test::requireThatTensorIsPrinted() +{ + TensorFieldValue tensorFieldValue; + tensorFieldValue = createTensor({ {{{"x", "4"}, {"y", "5"}}, 7} }, + {"x", "y"}); + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("tensor", tensorFieldValue); + + FieldBlock expect1("{ dimensions: [ 'x', 'y' ], cells: [" + "{ address: { x:'4', y:'5' }, value: 7.0 }" + "] }"); + + TEST_CALL(checkString(expect1.json, + SFC::convertSummaryField(false, + *doc.getValue("tensor"), + false).get())); + TEST_CALL(checkData(expect1.binary, + SFC::convertSummaryField(false, + *doc.getValue("tensor"), + true).get())); + doc.setValue("tensor", TensorFieldValue()); + + FieldBlock expect2("{ }"); + + TEST_CALL(checkString(expect2.json, + SFC::convertSummaryField(false, + *doc.getValue("tensor"), + false).get())); + TEST_CALL(checkData(expect2.binary, + SFC::convertSummaryField(false, + *doc.getValue("tensor"), + true).get())); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchcore/src/tests/proton/docsummary/summarymap.cfg b/searchcore/src/tests/proton/docsummary/summarymap.cfg new file mode 100644 index 00000000000..f2d429b1412 --- /dev/null +++ b/searchcore/src/tests/proton/docsummary/summarymap.cfg @@ -0,0 +1,48 @@ +override[16] +override[0].field "aa" +override[0].command "copy" +override[0].arguments "ab" +override[1].field "ab" +override[1].command "empty" +override[2].field "ba" +override[2].command "attribute" +override[2].arguments "ba" +override[3].field "bb" +override[3].command "attribute" +override[3].arguments "bb" +override[4].field "bc" +override[4].command "attribute" +override[4].arguments "bc" +override[5].field "bd" +override[5].command "attribute" +override[5].arguments "bd" +override[6].field "be" +override[6].command "attribute" +override[6].arguments "be" +override[7].field "bf" +override[7].command "attribute" +override[7].arguments "bf" +override[8].field "bg" +override[8].command "attribute" +override[8].arguments "bg" +override[9].field "bh" +override[9].command "attribute" +override[9].arguments "bh" +override[10].field "bi" +override[10].command "attribute" +override[10].arguments "bi" +override[11].field "dynamicstring" +override[11].command "dynamicteaser" +override[11].arguments "dynamicstring" +override[12].field "sp2x" +override[12].command "positions" +override[12].arguments "sp2" +override[13].field "ap2x" +override[13].command "positions" +override[13].arguments "ap2" +override[14].field "wp2x" +override[14].command "positions" +override[14].arguments "wp2" +override[15].field "bj" +override[15].command "attribute" +override[15].arguments "bj" |