aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-10-12 17:10:37 +0200
committerGitHub <noreply@github.com>2022-10-12 17:10:37 +0200
commitc2fff990ef096c124b235ce34e3975ccccdbe8d6 (patch)
tree194d83650f4cdd245032351abd1967b3985e3a4a
parentf329a9d5e0a323b0485dcae52d90987b675808bc (diff)
parente1e90137560795397e77203b4e1a75cd3c61396f (diff)
Merge pull request #24404 from vespa-engine/toregge/remove-docbuilder-and-add-stringfieldbuilder
Remove search::index::DocBuilder. Add search::index::StringFieldBuil…
-rw-r--r--searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp1
-rw-r--r--searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp64
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp6
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h10
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp8
-rw-r--r--searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp9
-rw-r--r--searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp10
-rw-r--r--searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp20
-rw-r--r--searchcore/src/tests/proton/index/fusionrunner_test.cpp24
-rw-r--r--searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp17
-rw-r--r--searchcore/src/tests/proton/index/indexmanager_test.cpp23
-rw-r--r--searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp8
-rw-r--r--searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h9
-rw-r--r--searchlib/CMakeLists.txt3
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp84
-rw-r--r--searchlib/src/tests/index/docbuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/docbuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/docbuilder/docbuilder_test.cpp437
-rw-r--r--searchlib/src/tests/index/doctypebuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/doctypebuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp74
-rw-r--r--searchlib/src/tests/index/string_field_builder/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp141
-rw-r--r--searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp92
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp357
-rw-r--r--searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp135
-rw-r--r--searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp38
-rw-r--r--searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp243
-rw-r--r--searchlib/src/vespa/searchlib/index/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/index/docbuilder.cpp814
-rw-r--r--searchlib/src/vespa/searchlib/index/docbuilder.h282
-rw-r--r--searchlib/src/vespa/searchlib/index/doctypebuilder.cpp175
-rw-r--r--searchlib/src/vespa/searchlib/index/doctypebuilder.h28
-rw-r--r--searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/index/empty_doc_builder.h1
-rw-r--r--searchlib/src/vespa/searchlib/index/string_field_builder.cpp140
-rw-r--r--searchlib/src/vespa/searchlib/index/string_field_builder.h45
38 files changed, 920 insertions, 2426 deletions
diff --git a/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp b/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp
index c66b2dd15dc..19b8348fb7a 100644
--- a/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp
+++ b/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchcore/proton/attribute/attribute_populator.h>
+#include <vespa/document/repo/documenttyperepo.h>
#include <vespa/searchcore/proton/attribute/attributemanager.h>
#include <vespa/searchcore/proton/common/hw_info.h>
#include <vespa/searchcore/proton/test/test.h>
diff --git a/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp b/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp
index e89d5eef078..4fc38992368 100644
--- a/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp
@@ -3,7 +3,11 @@
#include <vespa/persistence/spi/result.h>
#include <vespa/document/datatype/tensor_data_type.h>
#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/tensorfieldvalue.h>
#include <vespa/document/update/assignvalueupdate.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/documenttyperepo.h>
#include <vespa/document/update/documentupdate.h>
#include <vespa/document/update/clearvalueupdate.h>
@@ -29,7 +33,7 @@
#include <vespa/searchcore/proton/server/ireplayconfig.h>
#include <vespa/searchcore/proton/test/dummy_feed_view.h>
#include <vespa/searchcore/proton/test/transport_helper.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/transactionlog/translogserver.h>
#include <vespa/vespalib/testkit/testapp.h>
@@ -271,20 +275,33 @@ MyFeedView::~MyFeedView() = default;
struct SchemaContext {
- Schema::SP schema;
- std::unique_ptr<DocBuilder> builder;
+ Schema::SP schema;
+ EmptyDocBuilder builder;
SchemaContext();
+ SchemaContext(bool has_i2);
~SchemaContext();
DocTypeName getDocType() const {
- return DocTypeName(builder->getDocumentType().getName());
+ return DocTypeName(builder.get_document_type().getName());
}
- const std::shared_ptr<const document::DocumentTypeRepo> &getRepo() const { return builder->getDocumentTypeRepo(); }
+ std::shared_ptr<const document::DocumentTypeRepo> getRepo() const { return builder.get_repo_sp(); }
void addField(vespalib::stringref fieldName);
};
SchemaContext::SchemaContext()
+ : SchemaContext(false)
+{
+}
+
+SchemaContext::SchemaContext(bool has_i2)
: schema(std::make_shared<Schema>()),
- builder()
+ builder([has_i2](auto& header) {
+ header.addTensorField("tensor", "tensor(x{},y{})")
+ .addTensorField("tensor2", "tensor(x{},y{})")
+ .addField("i1", document::DataType::T_STRING);
+ if (has_i2) {
+ header.addField("i2", document::DataType::T_STRING);
+ }
+ })
{
schema->addAttributeField(Schema::AttributeField("tensor", DataType::TENSOR, CollectionType::SINGLE, "tensor(x{},y{})"));
schema->addAttributeField(Schema::AttributeField("tensor2", DataType::TENSOR, CollectionType::SINGLE, "tensor(x{},y{})"));
@@ -298,14 +315,13 @@ void
SchemaContext::addField(vespalib::stringref fieldName)
{
schema->addIndexField(Schema::IndexField(fieldName, DataType::STRING, CollectionType::SINGLE));
- builder = std::make_unique<DocBuilder>(*schema);
}
struct DocumentContext {
Document::SP doc;
BucketId bucketId;
- DocumentContext(const vespalib::string &docId, DocBuilder &builder) :
- doc(builder.startDocument(docId).endDocument().release()),
+ DocumentContext(const vespalib::string &docId, EmptyDocBuilder &builder) :
+ doc(builder.make_document(docId)),
bucketId(BucketFactory::getBucketId(doc->getId()))
{
}
@@ -313,7 +329,7 @@ struct DocumentContext {
struct TwoFieldsSchemaContext : public SchemaContext {
TwoFieldsSchemaContext()
- : SchemaContext()
+ : SchemaContext(true)
{
addField("i2");
}
@@ -324,8 +340,8 @@ TensorDataType tensor1DType(ValueType::from_spec("tensor(x{})"));
struct UpdateContext {
DocumentUpdate::SP update;
BucketId bucketId;
- UpdateContext(const vespalib::string &docId, DocBuilder &builder) :
- update(std::make_shared<DocumentUpdate>(*builder.getDocumentTypeRepo(), builder.getDocumentType(), DocumentId(docId))),
+ UpdateContext(const vespalib::string &docId, EmptyDocBuilder &builder) :
+ update(std::make_shared<DocumentUpdate>(builder.get_repo(), builder.get_document_type(), DocumentId(docId))),
bucketId(BucketFactory::getBucketId(update->getId()))
{
}
@@ -464,7 +480,7 @@ TEST_F("require that heartBeat calls FeedView's heartBeat",
TEST_F("require that outdated remove is ignored", FeedHandlerFixture)
{
- DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder);
+ DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder);
auto op = std::make_unique<RemoveOperationWithDocId>(doc_context.bucketId, Timestamp(10), doc_context.doc->getId());
static_cast<DocumentOperation &>(*op).setPrevDbDocumentId(DbDocumentId(4));
static_cast<DocumentOperation &>(*op).setPrevTimestamp(Timestamp(10000));
@@ -476,7 +492,7 @@ TEST_F("require that outdated remove is ignored", FeedHandlerFixture)
TEST_F("require that outdated put is ignored", FeedHandlerFixture)
{
- DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder);
+ DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder);
auto op =std::make_unique<PutOperation>(doc_context.bucketId, Timestamp(10), std::move(doc_context.doc));
static_cast<DocumentOperation &>(*op).setPrevTimestamp(Timestamp(10000));
FeedTokenContext token_context;
@@ -496,7 +512,7 @@ addLidToRemove(RemoveDocumentsOperation &op)
TEST_F("require that handleMove calls FeedView", FeedHandlerFixture)
{
- DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder);
+ DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder);
MoveOperation op(doc_context.bucketId, Timestamp(2), doc_context.doc, DbDocumentId(0, 2), 1);
op.setDbDocumentId(DbDocumentId(1, 2));
f.runAsMaster([&]() { f.handler.handleMove(op, IDestructorCallback::SP()); });
@@ -556,7 +572,7 @@ TEST_F("require that flush cannot unprune", FeedHandlerFixture)
TEST_F("require that remove of unknown document with known data type stores remove", FeedHandlerFixture)
{
- DocumentContext doc_context("id:test:searchdocument::foo", *f.schema.builder);
+ DocumentContext doc_context("id:test:searchdocument::foo", f.schema.builder);
auto op = std::make_unique<RemoveOperationWithDocId>(doc_context.bucketId, Timestamp(10), doc_context.doc->getId());
FeedTokenContext token_context;
f.handler.performOperation(std::move(token_context.token), std::move(op));
@@ -566,7 +582,7 @@ TEST_F("require that remove of unknown document with known data type stores remo
TEST_F("require that partial update for non-existing document is tagged as such", FeedHandlerFixture)
{
- UpdateContext upCtx("id:test:searchdocument::foo", *f.schema.builder);
+ UpdateContext upCtx("id:test:searchdocument::foo", f.schema.builder);
auto op = std::make_unique<UpdateOperation>(upCtx.bucketId, Timestamp(10), upCtx.update);
FeedTokenContext token_context;
f.handler.performOperation(std::move(token_context.token), std::move(op));
@@ -582,7 +598,7 @@ TEST_F("require that partial update for non-existing document is tagged as such"
TEST_F("require that partial update for non-existing document is created if specified", FeedHandlerFixture)
{
f.handler.setSerialNum(15);
- UpdateContext upCtx("id:test:searchdocument::foo", *f.schema.builder);
+ UpdateContext upCtx("id:test:searchdocument::foo", f.schema.builder);
upCtx.update->setCreateIfNonExistent(true);
f.feedView.metaStore.insert(upCtx.update->getId().getGlobalId(), MyDocumentMetaStore::Entry(5, 5, Timestamp(10)));
auto op = std::make_unique<UpdateOperation>(upCtx.bucketId, Timestamp(10), upCtx.update);
@@ -605,7 +621,7 @@ TEST_F("require that put is rejected if resource limit is reached", FeedHandlerF
f.writeFilter._acceptWriteOperation = false;
f.writeFilter._message = "Attribute resource limit reached";
- DocumentContext docCtx("id:test:searchdocument::foo", *f.schema.builder);
+ DocumentContext docCtx("id:test:searchdocument::foo", f.schema.builder);
auto op = std::make_unique<PutOperation>(docCtx.bucketId, Timestamp(10), std::move(docCtx.doc));
FeedTokenContext token;
f.handler.performOperation(std::move(token.token), std::move(op));
@@ -620,7 +636,7 @@ TEST_F("require that update is rejected if resource limit is reached", FeedHandl
f.writeFilter._acceptWriteOperation = false;
f.writeFilter._message = "Attribute resource limit reached";
- UpdateContext updCtx("id:test:searchdocument::foo", *f.schema.builder);
+ UpdateContext updCtx("id:test:searchdocument::foo", f.schema.builder);
updCtx.addFieldUpdate("tensor");
auto op = std::make_unique<UpdateOperation>(updCtx.bucketId, Timestamp(10), updCtx.update);
FeedTokenContext token;
@@ -637,7 +653,7 @@ TEST_F("require that remove is NOT rejected if resource limit is reached", FeedH
f.writeFilter._acceptWriteOperation = false;
f.writeFilter._message = "Attribute resource limit reached";
- DocumentContext docCtx("id:test:searchdocument::foo", *f.schema.builder);
+ DocumentContext docCtx("id:test:searchdocument::foo", f.schema.builder);
auto op = std::make_unique<RemoveOperationWithDocId>(docCtx.bucketId, Timestamp(10), docCtx.doc->getId());
FeedTokenContext token;
f.handler.performOperation(std::move(token.token), std::move(op));
@@ -651,7 +667,7 @@ checkUpdate(FeedHandlerFixture &f, SchemaContext &schemaContext,
const vespalib::string &fieldName, bool expectReject, bool existing)
{
f.handler.setSerialNum(15);
- UpdateContext updCtx("id:test:searchdocument::foo", *schemaContext.builder);
+ UpdateContext updCtx("id:test:searchdocument::foo", schemaContext.builder);
updCtx.addFieldUpdate(fieldName);
if (existing) {
f.feedView.metaStore.insert(updCtx.update->getId().getGlobalId(), MyDocumentMetaStore::Entry(5, 5, Timestamp(9)));
@@ -733,7 +749,7 @@ TEST_F("require that tensor update with wrong tensor type fails", FeedHandlerFix
TEST_F("require that put with different document type repo is ok", FeedHandlerFixture)
{
TwoFieldsSchemaContext schema;
- DocumentContext doc_context("id:ns:searchdocument::foo", *schema.builder);
+ DocumentContext doc_context("id:ns:searchdocument::foo", schema.builder);
auto op = std::make_unique<PutOperation>(doc_context.bucketId,
Timestamp(10), std::move(doc_context.doc));
FeedTokenContext token_context;
@@ -747,7 +763,7 @@ TEST_F("require that put with different document type repo is ok", FeedHandlerFi
TEST_F("require that feed stats are updated", FeedHandlerFixture)
{
- DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder);
+ DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder);
auto op =std::make_unique<PutOperation>(doc_context.bucketId, Timestamp(10), std::move(doc_context.doc));
FeedTokenContext token_context;
f.handler.performOperation(std::move(token_context.token), std::move(op));
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp
index 9c68d7d5974..b3a2e9cad83 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp
@@ -127,7 +127,8 @@ MyHandler::handleCompactLidSpace(const CompactLidSpaceOperation &op, std::shared
}
MyHandler::MyHandler(bool storeMoveDoneContexts, bool bucketIdEqualLid)
- : _stats(),
+ : _builder(),
+ _stats(),
_moveFromLid(0),
_moveToLid(0),
_handleMoveCnt(0),
@@ -140,9 +141,8 @@ MyHandler::MyHandler(bool storeMoveDoneContexts, bool bucketIdEqualLid)
_rm_listener(),
_docs()
{
- DocBuilder builder = DocBuilder(Schema());
for (uint32_t i(0); i < 10; i++) {
- auto doc = builder.startDocument(fmt("%s%d", DOC_ID.c_str(), i)).endDocument();
+ auto doc = _builder.make_document(fmt("%s%d", DOC_ID.c_str(), i));
_docs.emplace_back(DocumentMetaData(i, TIMESTAMP_1, createBucketId(i), doc->getId().getGlobalId()), std::move(doc));
}
}
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h
index b404fc6956a..806729a108c 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h
@@ -17,11 +17,14 @@
#include <vespa/searchcore/proton/test/test.h>
#include <vespa/searchcore/proton/test/dummy_document_store.h>
#include <vespa/vespalib/util/idestructorcallback.h>
-#include <vespa/searchlib/index/docbuilder.h>
-using namespace document;
+using document::BucketId;
+using document::GlobalId;
+using document::Document;
+using document::DocumentId;
+using document::DocumentTypeRepo;
using namespace proton;
-using namespace search::index;
+using search::index::EmptyDocBuilder;
using namespace search;
using namespace vespalib;
using vespalib::IDestructorCallback;
@@ -60,6 +63,7 @@ struct MyScanIterator : public IDocumentScanIterator {
};
struct MyHandler : public ILidSpaceCompactionHandler {
+ EmptyDocBuilder _builder;
std::vector<LidUsageStats> _stats;
std::vector<LidVector> _lids;
mutable uint32_t _moveFromLid;
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp
index bc9cd9a93fa..fd38853dca1 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp
@@ -5,7 +5,7 @@
#include <vespa/vespalib/gtest/gtest.h>
struct HandlerTest : public ::testing::Test {
- DocBuilder _docBuilder;
+ EmptyDocBuilder _docBuilder;
std::shared_ptr<bucketdb::BucketDBOwner> _bucketDB;
MyDocumentStore _docStore;
MySubDb _subDb;
@@ -15,13 +15,13 @@ struct HandlerTest : public ::testing::Test {
};
HandlerTest::HandlerTest()
- : _docBuilder(Schema()),
+ : _docBuilder(),
_bucketDB(std::make_shared<bucketdb::BucketDBOwner>()),
_docStore(),
- _subDb(_bucketDB, _docStore, _docBuilder.getDocumentTypeRepo()),
+ _subDb(_bucketDB, _docStore, _docBuilder.get_repo_sp()),
_handler(_subDb.maintenance_sub_db, "test")
{
- _docStore._readDoc = _docBuilder.startDocument(DOC_ID).endDocument();
+ _docStore._readDoc = _docBuilder.make_document(DOC_ID);
}
HandlerTest::~HandlerTest() = default;
diff --git a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp
index ea4d556c502..915402122b8 100644
--- a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp
@@ -35,7 +35,6 @@
#include <vespa/searchcore/proton/test/test.h>
#include <vespa/searchcore/proton/test/transport_helper.h>
#include <vespa/searchlib/common/idocumentmetastore.h>
-#include <vespa/searchlib/index/docbuilder.h>
#include <vespa/vespalib/data/slime/slime.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/destructor_callbacks.h>
@@ -99,11 +98,11 @@ class MyDocumentSubDB
uint32_t _subDBId;
DocumentMetaStore::SP _metaStoreSP;
DocumentMetaStore & _metaStore;
- const std::shared_ptr<const document::DocumentTypeRepo> &_repo;
+ std::shared_ptr<const document::DocumentTypeRepo> _repo;
const DocTypeName &_docTypeName;
public:
- MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, const std::shared_ptr<const document::DocumentTypeRepo> &repo,
+ MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, std::shared_ptr<const document::DocumentTypeRepo> repo,
std::shared_ptr<bucketdb::BucketDBOwner> bucketDB, const DocTypeName &docTypeName);
~MyDocumentSubDB();
@@ -136,7 +135,7 @@ public:
const IDocumentMetaStore &getMetaStore() const { return _metaStore; }
};
-MyDocumentSubDB::MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, const std::shared_ptr<const document::DocumentTypeRepo> &repo,
+MyDocumentSubDB::MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, std::shared_ptr<const document::DocumentTypeRepo> repo,
std::shared_ptr<bucketdb::BucketDBOwner> bucketDB, const DocTypeName &docTypeName)
: _docs(),
_subDBId(subDBId),
@@ -144,7 +143,7 @@ MyDocumentSubDB::MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, const st
std::move(bucketDB), DocumentMetaStore::getFixedName(), search::GrowStrategy(),
subDbType)),
_metaStore(*_metaStoreSP),
- _repo(repo),
+ _repo(std::move(repo)),
_docTypeName(docTypeName)
{
_metaStore.constructFreeList();
diff --git a/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp b/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp
index 00694b6b78f..67342df5613 100644
--- a/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp
@@ -2,6 +2,7 @@
#include <vespa/document/base/documentid.h>
#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/fieldvalue/document.h>
#include <vespa/searchcommon/common/schema.h>
#include <vespa/searchcore/proton/server/putdonecontext.h>
#include <vespa/searchcore/proton/server/removedonecontext.h>
@@ -13,7 +14,7 @@
#include <vespa/searchcore/proton/test/mock_summary_adapter.h>
#include <vespa/searchcore/proton/test/transport_helper.h>
#include <vespa/searchcore/proton/test/thread_utils.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/vespalib/util/destructor_callbacks.h>
#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/testkit/testapp.h>
@@ -32,7 +33,7 @@ using namespace proton;
using search::DocumentIdT;
using vespalib::IDestructorCallback;
using search::SerialNum;
-using search::index::DocBuilder;
+using search::index::EmptyDocBuilder;
using search::index::Schema;
using storage::spi::Timestamp;
using vespalib::make_string;
@@ -59,9 +60,8 @@ public:
};
std::shared_ptr<const DocumentTypeRepo> myGetDocumentTypeRepo() {
- Schema schema;
- DocBuilder builder(schema);
- std::shared_ptr<const DocumentTypeRepo> repo = builder.getDocumentTypeRepo();
+ EmptyDocBuilder builder;
+ std::shared_ptr<const DocumentTypeRepo> repo = builder.get_repo_sp();
ASSERT_TRUE(repo.get());
return repo;
}
diff --git a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp
index ac540ad2e2d..49f13d8c5b5 100644
--- a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp
+++ b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp
@@ -3,6 +3,8 @@
#include <vespa/document/datatype/datatype.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/fieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/common/documentsummary.h>
#include <vespa/vespalib/util/sequencedtaskexecutor.h>
#include <vespa/searchlib/common/flush_token.h>
@@ -10,8 +12,9 @@
#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/fef/fef.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
#include <vespa/searchlib/query/base.h>
@@ -31,6 +34,7 @@ LOG_SETUP("feed_and_search_test");
using document::DataType;
using document::Document;
using document::FieldValue;
+using document::StringFieldValue;
using search::DocumentIdT;
using search::FlushToken;
using search::TuneFileIndexing;
@@ -44,9 +48,10 @@ using search::fef::MatchData;
using search::fef::MatchDataLayout;
using search::fef::TermFieldHandle;
using search::fef::TermFieldMatchData;
-using search::index::DocBuilder;
+using search::index::EmptyDocBuilder;
using search::index::DummyFileHeaderContext;
using search::index::Schema;
+using search::index::StringFieldBuilder;
using search::index::test::MockFieldLengthInspector;
using search::memoryindex::MemoryIndex;
using search::query::SimpleStringTerm;
@@ -113,14 +118,13 @@ Schema getSchema() {
return schema;
}
-Document::UP buildDocument(DocBuilder & doc_builder, int id,
+Document::UP buildDocument(EmptyDocBuilder & doc_builder, int id,
const string &word) {
ostringstream ost;
ost << "id:ns:searchdocument::" << id;
- doc_builder.startDocument(ost.str());
- doc_builder.startIndexField(field_name)
- .addStr(noise).addStr(word).endField();
- return doc_builder.endDocument();
+ auto doc = doc_builder.make_document(ost.str());
+ doc->setValue(field_name, StringFieldBuilder(doc_builder).word(noise).space().word(word).build());
+ return doc;
}
// Performs a search using a Searchable.
@@ -165,7 +169,7 @@ void Test::requireThatMemoryIndexCanBeDumpedAndSearched() {
auto indexFieldInverter = vespalib::SequencedTaskExecutor::create(invert_executor, 2);
auto indexFieldWriter = vespalib::SequencedTaskExecutor::create(write_executor, 2);
MemoryIndex memory_index(schema, MockFieldLengthInspector(), *indexFieldInverter, *indexFieldWriter);
- DocBuilder doc_builder(schema);
+ EmptyDocBuilder doc_builder([](auto& header) { header.addField(field_name, DataType::T_STRING); });
Document::UP doc = buildDocument(doc_builder, doc_id1, word1);
memory_index.insertDocument(doc_id1, *doc, {});
diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
index 850f8a8f0d1..166d34f366b 100644
--- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp
+++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
@@ -1,15 +1,19 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchcorespi/index/fusionrunner.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchcore/proton/index/indexmanager.h>
#include <vespa/searchcore/proton/test/transport_helper.h>
-#include <vespa/searchcorespi/index/fusionrunner.h>
#include <vespa/vespalib/util/isequencedtaskexecutor.h>
#include <vespa/searchlib/common/flush_token.h>
#include <vespa/searchlib/diskindex/diskindex.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
@@ -25,6 +29,7 @@
using document::Document;
using document::FieldValue;
+using document::StringFieldValue;
using proton::ExecutorThreadingService;
using proton::index::IndexManager;
using search::FixedSourceSelector;
@@ -38,9 +43,10 @@ using search::fef::MatchData;
using search::fef::MatchDataLayout;
using search::fef::TermFieldHandle;
using search::fef::TermFieldMatchData;
-using search::index::DocBuilder;
+using search::index::EmptyDocBuilder;
using search::index::DummyFileHeaderContext;
using search::index::Schema;
+using search::index::StringFieldBuilder;
using search::index::schema::DataType;
using search::index::test::MockFieldLengthInspector;
using search::memoryindex::MemoryIndex;
@@ -149,15 +155,15 @@ void Test::tearDown() {
_selector.reset(0);
}
-Document::UP buildDocument(DocBuilder & doc_builder, int id, const string &word) {
+Document::UP buildDocument(EmptyDocBuilder & doc_builder, int id, const string &word) {
vespalib::asciistream ost;
ost << "id:ns:searchdocument::" << id;
- doc_builder.startDocument(ost.str());
- doc_builder.startIndexField(field_name).addStr(word).endField();
- return doc_builder.endDocument();
+ auto doc = doc_builder.make_document(ost.str());
+ doc->setValue(field_name, StringFieldBuilder(doc_builder).word(word).build());
+ return doc;
}
-void addDocument(DocBuilder & doc_builder, MemoryIndex &index, ISourceSelector &selector,
+void addDocument(EmptyDocBuilder & doc_builder, MemoryIndex &index, ISourceSelector &selector,
uint8_t index_id, uint32_t docid, const string &word) {
Document::UP doc = buildDocument(doc_builder, docid, word);
index.insertDocument(docid, *doc, {});
@@ -181,7 +187,7 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) {
_selector->setDefaultSource(id - _selector->getBaseId());
Schema schema = getSchema();
- DocBuilder doc_builder(schema);
+ EmptyDocBuilder doc_builder([](auto& header) { header.addField(field_name, document::DataType::T_STRING); });
MemoryIndex memory_index(schema, MockFieldLengthInspector(),
_service.write().indexFieldInverter(),
_service.write().indexFieldWriter());
diff --git a/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp b/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp
index 75e6b01b46f..7202d7f0abe 100644
--- a/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp
+++ b/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp
@@ -1,10 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
-
#include <vespa/searchcore/proton/index/index_writer.h>
+#include <vespa/document/fieldvalue/document.h>
#include <vespa/searchcore/proton/test/mock_index_manager.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
#include <vespa/log/log.h>
LOG_SETUP("index_writer_test");
@@ -80,21 +82,18 @@ struct Fixture
IIndexManager::SP iim;
MyIndexManager &mim;
IndexWriter iw;
- Schema schema;
- DocBuilder builder;
+ EmptyDocBuilder builder;
Document::UP dummyDoc;
Fixture()
: iim(new MyIndexManager()),
mim(static_cast<MyIndexManager &>(*iim)),
iw(iim),
- schema(),
- builder(schema),
+ builder(),
dummyDoc(createDoc(1234)) // This content of this is not used
{
}
Document::UP createDoc(uint32_t lid) {
- builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", lid));
- return builder.endDocument();
+ return builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", lid));
}
void put(SerialNum serialNum, const search::DocumentIdT lid) {
iw.put(serialNum, *dummyDoc, lid, {});
diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp
index b427daa4ad1..886978f7465 100644
--- a/searchcore/src/tests/proton/index/indexmanager_test.cpp
+++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp
@@ -1,6 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchcore/proton/index/indexmanager.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/document/fieldvalue/document.h>
#include <vespa/searchcore/proton/test/transport_helper.h>
#include <vespa/searchcorespi/index/index_manager_stats.h>
#include <vespa/searchcorespi/index/indexcollection.h>
@@ -9,8 +13,9 @@
#include <vespa/vespalib/util/sequencedtaskexecutor.h>
#include <vespa/searchlib/common/flush_token.h>
#include <vespa/searchlib/common/serialnum.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/compact_words_store.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
@@ -34,6 +39,7 @@ LOG_SETUP("indexmanager_test");
using document::Document;
using document::FieldValue;
+using document::StringFieldValue;
using proton::index::IndexConfig;
using proton::index::IndexManager;
using vespalib::SequencedTaskExecutor;
@@ -42,10 +48,11 @@ using search::TuneFileAttributes;
using search::TuneFileIndexManager;
using search::TuneFileIndexing;
using vespalib::datastore::EntryRef;
-using search::index::DocBuilder;
+using search::index::EmptyDocBuilder;
using search::index::DummyFileHeaderContext;
using search::index::FieldLengthInfo;
using search::index::Schema;
+using search::index::StringFieldBuilder;
using search::index::schema::DataType;
using search::index::test::MockFieldLengthInspector;
using search::memoryindex::CompactWordsStore;
@@ -88,13 +95,13 @@ void removeTestData() {
std::filesystem::remove_all(std::filesystem::path(index_dir));
}
-Document::UP buildDocument(DocBuilder &doc_builder, int id,
+Document::UP buildDocument(EmptyDocBuilder &doc_builder, int id,
const string &word) {
vespalib::asciistream ost;
ost << "id:ns:searchdocument::" << id;
- doc_builder.startDocument(ost.str());
- doc_builder.startIndexField(field_name).addStr(word).endField();
- return doc_builder.endDocument();
+ auto doc = doc_builder.make_document(ost.str());
+ doc->setValue(field_name, StringFieldBuilder(doc_builder).word(word).build());
+ return doc;
}
void push_documents_and_wait(search::memoryindex::DocumentInverter &inverter) {
@@ -110,7 +117,7 @@ struct IndexManagerTest : public ::testing::Test {
TransportAndExecutorService _service;
std::unique_ptr<IndexManager> _index_manager;
Schema _schema;
- DocBuilder _builder;
+ EmptyDocBuilder _builder;
IndexManagerTest()
: _serial_num(0),
@@ -119,7 +126,7 @@ struct IndexManagerTest : public ::testing::Test {
_service(1),
_index_manager(),
_schema(getSchema()),
- _builder(_schema)
+ _builder([](auto& header) { header.addField(field_name, document::DataType::T_STRING); })
{
removeTestData();
std::filesystem::create_directory(std::filesystem::path(index_dir));
diff --git a/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp b/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp
index da645f9a94b..719e762288e 100644
--- a/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp
+++ b/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp
@@ -3,7 +3,7 @@
LOG_SETUP("document_reprocessing_handler_test");
#include <vespa/searchcore/proton/reprocessing/document_reprocessing_handler.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/vespalib/testkit/testapp.h>
using namespace document;
@@ -32,17 +32,17 @@ const vespalib::string DOC_ID = "id:test:searchdocument::0";
struct FixtureBase
{
DocumentReprocessingHandler _handler;
- DocBuilder _docBuilder;
+ EmptyDocBuilder _docBuilder;
FixtureBase(uint32_t docIdLimit);
~FixtureBase();
std::shared_ptr<Document> createDoc() {
- return _docBuilder.startDocument(DOC_ID).endDocument();
+ return _docBuilder.make_document(DOC_ID);
}
};
FixtureBase::FixtureBase(uint32_t docIdLimit)
: _handler(docIdLimit),
- _docBuilder(Schema())
+ _docBuilder()
{ }
FixtureBase::~FixtureBase() {}
diff --git a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp
index 2cdf1c45485..f9f98705144 100644
--- a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp
+++ b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp
@@ -5,8 +5,7 @@
namespace proton::test {
UserDocumentsBuilder::UserDocumentsBuilder()
- : _schema(),
- _builder(_schema),
+ : _builder(),
_docs()
{
}
@@ -17,7 +16,7 @@ UserDocumentsBuilder &
UserDocumentsBuilder::createDoc(uint32_t userId, search::DocumentIdT lid)
{
vespalib::string docId = vespalib::make_string("id:test:searchdocument:n=%u:%u", userId, lid);
- document::Document::SP doc(_builder.startDocument(docId).endDocument().release());
+ document::Document::SP doc(_builder.make_document(docId));
_docs.addDoc(userId, Document(doc, lid, storage::spi::Timestamp(lid)));
return *this;
}
diff --git a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h
index f05b6da11de..9e806c8a0bf 100644
--- a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h
+++ b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h
@@ -2,7 +2,7 @@
#pragma once
#include "userdocuments.h"
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/vespalib/util/stringfmt.h>
namespace proton::test {
@@ -13,14 +13,13 @@ namespace proton::test {
class UserDocumentsBuilder
{
private:
- search::index::Schema _schema;
- search::index::DocBuilder _builder;
+ search::index::EmptyDocBuilder _builder;
UserDocuments _docs;
public:
UserDocumentsBuilder();
~UserDocumentsBuilder();
- const std::shared_ptr<const document::DocumentTypeRepo> &getRepo() const {
- return _builder.getDocumentTypeRepo();
+ std::shared_ptr<const document::DocumentTypeRepo> getRepo() const {
+ return _builder.get_repo_sp();
}
UserDocumentsBuilder &createDoc(uint32_t userId, search::DocumentIdT lid);
UserDocumentsBuilder &createDocs(uint32_t userId, search::DocumentIdT begin,
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 62aca6d68cc..c8b3db42340 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -168,9 +168,8 @@ vespa_define_module(
src/tests/grouping
src/tests/groupingengine
src/tests/hitcollector
- src/tests/index/docbuilder
- src/tests/index/doctypebuilder
src/tests/index/field_length_calculator
+ src/tests/index/string_field_builder
src/tests/indexmetainfo
src/tests/ld-library-path
src/tests/memoryindex/compact_words_store
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
index 6e60d14b8ff..8feb7b7e287 100644
--- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -1,14 +1,20 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/diskindex/fusion.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/common/flush_token.h>
#include <vespa/searchlib/diskindex/diskindex.h>
-#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/diskindex/zcposoccrandread.h>
#include <vespa/searchlib/fef/fieldpositionsiterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/index/schemautil.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
@@ -31,7 +37,10 @@ LOG_SETUP("fusion_test");
namespace search {
+using document::ArrayFieldValue;
using document::Document;
+using document::StringFieldValue;
+using document::WeightedSetFieldValue;
using fef::FieldPositionsIterator;
using fef::TermFieldMatchData;
using fef::TermFieldMatchDataArray;
@@ -110,26 +119,20 @@ toString(FieldPositionsIterator posItr, bool hasElements = false, bool hasWeight
}
std::unique_ptr<Document>
-make_doc10(DocBuilder &b)
+make_doc10(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- addStr("e").addStr("f").addStr("z").
- endField();
- b.startIndexField("f1").
- addStr("w").addStr("x").
- addStr("y").addStr("z").
- endField();
- b.startIndexField("f2").
- startElement(4).addStr("ax").addStr("ay").addStr("z").endElement().
- startElement(5).addStr("ax").endElement().
- endField();
- b.startIndexField("f3").
- startElement(4).addStr("wx").addStr("z").endElement().
- endField();
-
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StringFieldBuilder sfb(b);
+ doc->setValue("f0", sfb.tokenize("a b c d e f z").build());
+ doc->setValue("f1", sfb.tokenize("w x y z").build());
+ ArrayFieldValue string_array(b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("ax ay z").build());
+ string_array.add(sfb.tokenize("ax").build());
+ doc->setValue("f2", string_array);
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("wx z").build(), 4);
+ doc->setValue("f3", string_wset);
+ return doc;
}
Schema::IndexField
@@ -151,6 +154,18 @@ make_schema(bool interleaved_features)
return schema;
}
+EmptyDocBuilder::AddFieldsType
+make_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+}
+
void
assert_interleaved_features(DiskIndex &d, const vespalib::string &field, const vespalib::string &term, uint32_t doc_id, uint32_t exp_num_occs, uint32_t exp_field_length)
{
@@ -327,7 +342,8 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
addField("f2").addField("f3").
addField("f4"));
FieldIndexCollection fic(schema, MockFieldLengthInspector());
- DocBuilder b(schema);
+ EmptyDocBuilder b(make_add_fields());
+ StringFieldBuilder sfb(b);
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
DocumentInverterContext inv_context(schema, *invertThreads, *pushThreads, fic);
@@ -338,19 +354,21 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
inv.invertDocument(10, *doc, {});
myPushDocument(inv);
- b.startDocument("id:ns:searchdocument::11").
- startIndexField("f3").
- startElement(-27).addStr("zz").endElement().
- endField();
- doc = b.endDocument();
+ doc = b.make_document("id:ns:searchdocument::11");
+ {
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.word("zz").build(), -27);
+ doc->setValue("f3", string_wset);
+ }
inv.invertDocument(11, *doc, {});
myPushDocument(inv);
- b.startDocument("id:ns:searchdocument::12").
- startIndexField("f3").
- startElement(0).addStr("zz0").endElement().
- endField();
- doc = b.endDocument();
+ doc = b.make_document("id:ns:searchdocument::12");
+ {
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.word("zz0").build(), 0);
+ doc->setValue("f3", string_wset);
+ }
inv.invertDocument(12, *doc, {});
myPushDocument(inv);
@@ -468,7 +486,7 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng
FieldIndexCollection fic(_schema, field_length_inspector);
uint32_t numDocs = 20;
uint32_t numWords = 1000;
- DocBuilder b(_schema);
+ EmptyDocBuilder b(make_add_fields());
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
DocumentInverterContext inv_context(_schema, *invertThreads, *pushThreads, fic);
diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore
deleted file mode 100644
index 999644fce87..00000000000
--- a/searchlib/src/tests/index/docbuilder/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*_test
-.depend
-Makefile
-docbuilder_test
-searchlib_docbuilder_test_app
diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt
deleted file mode 100644
index 7a969f602ea..00000000000
--- a/searchlib/src/tests/index/docbuilder/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_docbuilder_test_app TEST
- SOURCES
- docbuilder_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app)
diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
deleted file mode 100644
index f76b61dcb78..00000000000
--- a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/log/log.h>
-LOG_SETUP("docbuilder_test");
-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <vespa/searchlib/index/docbuilder.h>
-#include <vespa/vespalib/encoding/base64.h>
-#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <iostream>
-
-using namespace document;
-using search::index::schema::CollectionType;
-
-namespace search::index {
-
-namespace
-{
-std::string empty;
-}
-
-namespace linguistics
-{
-const vespalib::string SPANTREE_NAME("linguistics");
-}
-
-
-TEST("test docBuilder")
-{
- Schema s;
- s.addIndexField(Schema::IndexField("ia", schema::DataType::STRING));
- s.addIndexField(Schema::IndexField("ib", schema::DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("ic", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addUriIndexFields(Schema::IndexField("iu", schema::DataType::STRING));
- s.addUriIndexFields(Schema::IndexField("iau", schema::DataType::STRING, CollectionType::ARRAY));
- s.addUriIndexFields(Schema::IndexField("iwu", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("aa", schema::DataType::INT32));
- s.addAttributeField(Schema::AttributeField("ab", schema::DataType::FLOAT));
- s.addAttributeField(Schema::AttributeField("ac", schema::DataType::STRING));
- s.addAttributeField(Schema::AttributeField("ad", schema::DataType::INT32, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("ae", schema::DataType::FLOAT, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("af", schema::DataType::STRING, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("ag", schema::DataType::INT32, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("ah", schema::DataType::FLOAT, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("ai", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("asp1", schema::DataType::INT32));
- s.addAttributeField(Schema::AttributeField("asp2", schema::DataType::INT64));
- s.addAttributeField(Schema::AttributeField("aap1", schema::DataType::INT32, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("aap2", schema::DataType::INT64, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("awp1", schema::DataType::INT32, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("awp2", schema::DataType::INT64, CollectionType::WEIGHTEDSET));
-
- DocBuilder b(s);
- Document::UP doc;
- std::vector<std::string> lines;
- std::vector<std::string>::const_iterator itr;
- std::string xml;
-
- { // empty
- doc = b.startDocument("id:ns:searchdocument::0").endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::0\"/>", *itr++);
- EXPECT_EQUAL("", *itr++);
- EXPECT_TRUE(itr == lines.end());
- }
- { // all fields set
- std::vector<char> binaryBlob;
- binaryBlob.push_back('\0');
- binaryBlob.push_back('\2');
- binaryBlob.push_back('\1');
- std::string raw1s("Single Raw Element");
- std::string raw1a0("Array Raw Element 0");
- std::string raw1a1("Array Raw Element 1");
- std::string raw1w0("Weighted Set Raw Element 0");
- std::string raw1w1("Weighted Set Raw Element 1");
- raw1s += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1a0 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1a1 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1w0 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1w1 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- b.startDocument("id:ns:searchdocument::1");
- b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField();
- b.startIndexField("ib").startElement().addStr("foo").endElement().
- startElement(1).addStr("bar").addStr("baz").endElement().endField();
- b. startIndexField("ic").
- startElement(20).addStr("bar").addStr("baz").endElement().
- startElement().addStr("foo").endElement().
- endField();
- b.startIndexField("iu").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- b.startIndexField("iau").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- b.startIndexField("iwu").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- b.startAttributeField("aa").addInt(2147483647).endField();
- b.startAttributeField("ab").addFloat(1234.56).endField();
- b.startAttributeField("ac").addStr("foo baz").endField();
- b.startAttributeField("ad").startElement().addInt(10).endElement().endField();
- b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField();
- b.startAttributeField("af").startElement().addStr("foo").endElement().endField();
- b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField();
- b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField();
- b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField();
- b.startAttributeField("asp1").addInt(1001).endField();
- b.startAttributeField("asp2").addPosition(1002, 1003).endField();
- b.startAttributeField("aap1").
- startElement().addInt(1004).endElement().
- startElement().addInt(1005).endElement().
- endField();
- b.startAttributeField("aap2").
- startElement().addPosition(1006, 1007).endElement().
- startElement().addPosition(1008, 1009).endElement().
- endField();
- b.startAttributeField("awp1").
- startElement(41).addInt(1010).endElement().
- startElement(42).addInt(1011).endElement().
- endField();
- b.startAttributeField("awp2").
- startElement(43).addPosition(1012, 1013).endElement().
- startElement(44).addPosition(1014, 1015).endElement().
- endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::1\">", *itr++);
- EXPECT_EQUAL("<iu>", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:81/fluke?ab=2#4</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>81</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>4</fragment>", *itr++);
- EXPECT_EQUAL("</iu>", *itr++);
- EXPECT_EQUAL("<aa>2147483647</aa>", *itr++);
- EXPECT_EQUAL("<aap2>", *itr++);
- EXPECT_EQUAL("<item>1047806</item>", *itr++);
- EXPECT_EQUAL("<item>1048322</item>", *itr++);
- EXPECT_EQUAL("</aap2>", *itr++);
- EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++);
- EXPECT_EQUAL("<ae>", *itr++);
- EXPECT_EQUAL("<item>10.5</item>", *itr++);
- EXPECT_EQUAL("</ae>", *itr++);
- EXPECT_EQUAL("<ib>", *itr++);
- EXPECT_EQUAL("<item>foo</item>", *itr++);
- EXPECT_EQUAL("<item>bar baz</item>", *itr++);
- EXPECT_EQUAL("</ib>", *itr++);
- EXPECT_EQUAL("<ah>", *itr++);
- EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++);
- EXPECT_EQUAL("</ah>", *itr++);
- EXPECT_EQUAL("<ic>", *itr++);
- EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++);
- EXPECT_EQUAL("</ic>", *itr++);
- EXPECT_EQUAL("<ac>foo baz</ac>", *itr++);
- EXPECT_EQUAL("<awp2>", *itr++);
- EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++);
- EXPECT_EQUAL("</awp2>", *itr++);
- EXPECT_EQUAL("<iau>", *itr++);
- EXPECT_EQUAL("<item>", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:82/fluke?ab=2#8</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>82</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>8</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("<item>", *itr++);
- EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++);
- EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>82</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>9</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("</iau>", *itr++);
- EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++);
- EXPECT_EQUAL("<ai>", *itr++);
- EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++);
- EXPECT_EQUAL("</ai>", *itr++);
- EXPECT_EQUAL("<asp1>1001</asp1>", *itr++);
- EXPECT_EQUAL("<ad>", *itr++);
- EXPECT_EQUAL("<item>10</item>", *itr++);
- EXPECT_EQUAL("</ad>", *itr++);
- EXPECT_EQUAL("<iwu>", *itr++);
- EXPECT_EQUAL("<item weight=\"4\">", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:83/fluke?ab=2#12</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>83</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>12</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"7\">", *itr++);
- EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++);
- EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>85</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>13</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("</iwu>", *itr++);
- EXPECT_EQUAL("<ab>1234.56</ab>", *itr++);
- EXPECT_EQUAL("<ag>", *itr++);
- EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++);
- EXPECT_EQUAL("</ag>", *itr++);
- EXPECT_EQUAL("<awp1>", *itr++);
- EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++);
- EXPECT_EQUAL("</awp1>", *itr++);
- EXPECT_EQUAL("<aap1>", *itr++);
- EXPECT_EQUAL("<item>1004</item>", *itr++);
- EXPECT_EQUAL("<item>1005</item>", *itr++);
- EXPECT_EQUAL("</aap1>", *itr++);
- EXPECT_EQUAL("<af>", *itr++);
- EXPECT_EQUAL("<item>foo</item>", *itr++);
- EXPECT_EQUAL("</af>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
-#if 0
- std::cout << "onedoc xml start -----" << std::endl <<
- xml << std::endl <<
- "-------" << std::endl;
- std::cout << "onedoc toString start ----" << std::endl <<
- doc->toString(true) << std::endl <<
- "-------" << std::endl;
-#endif
- }
- { // create one more to see that everything is cleared
- b.startDocument("id:ns:searchdocument::2");
- b.startIndexField("ia").addStr("yes").endField();
- b.startAttributeField("aa").addInt(20).endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::2\">", *itr++);
- EXPECT_EQUAL("<aa>20</aa>", *itr++);
- EXPECT_EQUAL("<ia>yes</ia>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
- }
- { // create field with cjk chars
- b.startDocument("id:ns:searchdocument::3");
- b.startIndexField("ia").
- addStr("我就是那个").
- setAutoSpace(false).
- addStr("大灰狼").
- setAutoSpace(true).
- endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::3\">", *itr++);
- EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
- const FieldValue::UP iaval = doc->getValue("ia");
- ASSERT_TRUE(iaval.get() != NULL);
- const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *>
- (iaval.get());
- ASSERT_TRUE(iasval != NULL);
- StringFieldValue::SpanTrees trees = iasval->getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME);
- ASSERT_TRUE(tree != NULL);
- std::vector<Span> spans;
- std::vector<Span> expSpans;
- for (SpanTree::const_iterator i = tree->begin(), ie = tree->end();
- i != ie; ++i) {
- Annotation &ann = const_cast<Annotation &>(*i);
- const Span *span = dynamic_cast<const Span *>(ann.getSpanNode());
- if (span == NULL)
- continue;
- spans.push_back(*span);
- }
- expSpans.push_back(Span(0, 15));
- expSpans.push_back(Span(0, 15));
- expSpans.push_back(Span(15, 9));
- expSpans.push_back(Span(15, 9));
- ASSERT_TRUE(expSpans == spans);
-#if 0
- std::cout << "onedoc xml start -----" << std::endl <<
- xml << std::endl <<
- "-------" << std::endl;
- std::cout << "onedoc toString start ----" << std::endl <<
- doc->toString(true) << std::endl <<
- "-------" << std::endl;
-#endif
- }
-}
-
-TEST("test if index names are valid uri parts") {
- EXPECT_FALSE(UriField::mightBePartofUri("all"));
- EXPECT_FALSE(UriField::mightBePartofUri("fragment"));
- EXPECT_FALSE(UriField::mightBePartofUri(".all"));
- EXPECT_FALSE(UriField::mightBePartofUri("all.b"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.all"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.scheme"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.host"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.port"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.hostname"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.path"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.query"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.fragment"));
-}
-
-}
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore
deleted file mode 100644
index f15be1efcfe..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*_test
-.depend
-Makefile
-doctypebuilder_test
-searchlib_doctypebuilder_test_app
diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
deleted file mode 100644
index 348ecde5a7c..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_doctypebuilder_test_app TEST
- SOURCES
- doctypebuilder_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app)
diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
deleted file mode 100644
index 95854fa11b2..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/document/repo/documenttyperepo.h>
-#include <vespa/searchlib/index/doctypebuilder.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/vespalib/testkit/testapp.h>
-
-using namespace document;
-
-namespace search {
-namespace index {
-
-using schema::CollectionType;
-using schema::DataType;
-
-TEST("testSearchDocType") {
- Schema s;
- s.addIndexField(Schema::IndexField("ia", DataType::STRING));
- s.addIndexField(Schema::IndexField("ib", DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("ic", DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addUriIndexFields(Schema::IndexField("iu", DataType::STRING));
- s.addUriIndexFields(Schema::IndexField("iau", DataType::STRING, CollectionType::ARRAY));
- s.addUriIndexFields(Schema::IndexField("iwu", DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("aa", DataType::INT32));
- s.addAttributeField(Schema::AttributeField("spos", DataType::INT64));
- s.addAttributeField(Schema::AttributeField("apos", DataType::INT64, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("wpos", DataType::INT64, CollectionType::WEIGHTEDSET));
-
- DocTypeBuilder docTypeBuilder(s);
- document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig();
- DocumentTypeRepo repo(config);
- const DocumentType *docType = repo.getDocumentType("searchdocument");
- ASSERT_TRUE(docType);
- EXPECT_EQUAL(10u, docType->getFieldCount());
-
- EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName());
- EXPECT_EQUAL("Array<String>",
- docType->getField("ib").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<String>",
- docType->getField("ic").getDataType().getName());
- EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName());
- EXPECT_EQUAL("Array<url>",
- docType->getField("iau").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<url>",
- docType->getField("iwu").getDataType().getName());
-
- EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName());
- EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName());
- EXPECT_EQUAL("Array<Long>",
- docType->getField("apos").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<Long>",
- docType->getField("wpos").getDataType().getName());
-}
-
-TEST("require that multiple fields can have the same type") {
- Schema s;
- s.addIndexField(Schema::IndexField("array1", DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("array2", DataType::STRING, CollectionType::ARRAY));
- DocTypeBuilder docTypeBuilder(s);
- document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig();
- DocumentTypeRepo repo(config);
- const DocumentType *docType = repo.getDocumentType("searchdocument");
- ASSERT_TRUE(docType);
- EXPECT_EQUAL(2u, docType->getFieldCount());
-
- EXPECT_EQUAL("Array<String>",
- docType->getField("array1").getDataType().getName());
- EXPECT_EQUAL("Array<String>",
- docType->getField("array2").getDataType().getName());
-}
-
-} // namespace index
-} // namespace search
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/string_field_builder/CMakeLists.txt b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt
new file mode 100644
index 00000000000..f8774eae5ca
--- /dev/null
+++ b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_string_field_builder_test_app TEST
+ SOURCES
+ string_field_builder_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_string_field_builder_test_app COMMAND searchlib_string_field_builder_test_app)
diff --git a/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp
new file mode 100644
index 00000000000..8c2b641f724
--- /dev/null
+++ b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp
@@ -0,0 +1,141 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/index/string_field_builder.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <cassert>
+#include <iostream>
+
+using document::Annotation;
+using document::AnnotationType;
+using document::Span;
+using document::SpanNode;
+using document::SpanTree;
+using document::StringFieldValue;
+using search::index::EmptyDocBuilder;
+using search::index::StringFieldBuilder;
+
+namespace
+{
+
+const vespalib::string SPANTREE_NAME("linguistics");
+
+struct MyAnnotation {
+ int32_t start;
+ int32_t length;
+ std::optional<vespalib::string> label;
+
+ MyAnnotation(int32_t start_in, int32_t length_in) noexcept
+ : start(start_in),
+ length(length_in),
+ label()
+ {
+ }
+
+ MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept
+ : start(start_in),
+ length(length_in),
+ label(label_in)
+ {
+ }
+
+ bool operator==(const MyAnnotation& rhs) const noexcept;
+};
+
+bool
+MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept
+{
+ return start == rhs.start &&
+ length == rhs.length &&
+ label == rhs.label;
+}
+
+
+std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) {
+ os << "[" << ann.start << "," << ann.length << "]";
+ if (ann.label.has_value()) {
+ os << "(\"" << ann.label.value() << "\")";
+ }
+ return os;
+}
+
+}
+
+class StringFieldBuilderTest : public testing::Test
+{
+protected:
+ EmptyDocBuilder edb;
+ StringFieldBuilder sfb;
+ StringFieldBuilderTest();
+ ~StringFieldBuilderTest();
+ std::vector<MyAnnotation> get_annotations(const StringFieldValue& val);
+ void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val);
+};
+
+StringFieldBuilderTest::StringFieldBuilderTest()
+ : testing::Test(),
+ edb(),
+ sfb(edb)
+{
+}
+
+StringFieldBuilderTest::~StringFieldBuilderTest() = default;
+
+std::vector<MyAnnotation>
+StringFieldBuilderTest::get_annotations(const StringFieldValue& val)
+{
+ std::vector<MyAnnotation> result;
+ StringFieldValue::SpanTrees trees = val.getSpanTrees();
+ const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
+ if (tree != nullptr) {
+ for (auto& ann : *tree) {
+ assert(ann.getType() == *AnnotationType::TERM);
+ auto span = dynamic_cast<const Span *>(ann.getSpanNode());
+ if (span == nullptr) {
+ continue;
+ }
+ auto ann_fv = ann.getFieldValue();
+ if (ann_fv == nullptr) {
+ result.emplace_back(span->from(), span->length());
+ } else {
+ result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue());
+ }
+ }
+ }
+ return result;
+}
+
+void
+StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val)
+{
+ EXPECT_EQ(exp, get_annotations(val));
+ EXPECT_EQ(plain, val.getValue());
+}
+
+TEST_F(StringFieldBuilderTest, no_annotations)
+{
+ assert_annotations({}, "foo", StringFieldValue("foo"));
+}
+
+TEST_F(StringFieldBuilderTest, single_word)
+{
+ assert_annotations({{0, 4}}, "word", sfb.word("word").build());
+}
+
+TEST_F(StringFieldBuilderTest, tokenize)
+{
+ assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build());
+}
+
+TEST_F(StringFieldBuilderTest, alt_word)
+{
+ assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
index 3f8a04d9460..83746b611fb 100644
--- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
@@ -1,8 +1,13 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/searchlib/index/docbuilder.h>
-#include <vespa/searchlib/index/field_length_calculator.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
+#include <vespa/searchlib/index/field_length_calculator.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
@@ -19,9 +24,10 @@
namespace search::memoryindex {
using document::Document;
-using index::DocBuilder;
+using index::EmptyDocBuilder;
using index::FieldLengthCalculator;
using index::Schema;
+using index::StringFieldBuilder;
using index::schema::CollectionType;
using index::schema::DataType;
using vespalib::SequencedTaskExecutor;
@@ -29,64 +35,68 @@ using vespalib::ISequencedTaskExecutor;
namespace {
+EmptyDocBuilder::AddFieldsType
+make_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+}
+
Document::UP
-makeDoc10(DocBuilder &b)
+makeDoc10(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.tokenize("a b c d").build());
+ return doc;
}
Document::UP
-makeDoc11(DocBuilder &b)
+makeDoc11(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::11");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("e").addStr("f").
- endField();
- b.startIndexField("f1").
- addStr("a").addStr("g").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::11");
+ doc->setValue("f0", sfb.tokenize("a b e f").build());
+ doc->setValue("f1", sfb.tokenize("a g").build());
+ return doc;
}
Document::UP
-makeDoc12(DocBuilder &b)
+makeDoc12(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::12");
- b.startIndexField("f0").
- addStr("h").addStr("doc12").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::12");
+ doc->setValue("f0", sfb.tokenize("h doc12").build());
+ return doc;
}
Document::UP
-makeDoc13(DocBuilder &b)
+makeDoc13(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::13");
- b.startIndexField("f0").
- addStr("i").addStr("doc13").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::13");
+ doc->setValue("f0", sfb.tokenize("i doc13").build());
+ return doc;
}
Document::UP
-makeDoc14(DocBuilder &b)
+makeDoc14(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::14");
- b.startIndexField("f0").
- addStr("j").addStr("doc14").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::14");
+ doc->setValue("f0", sfb.tokenize("j doc14").build());
+ return doc;
}
Document::UP
-makeDoc15(DocBuilder &b)
+makeDoc15(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::15");
- return b.endDocument();
+ return b.make_document("id:ns:searchdocument::15");
}
}
@@ -96,7 +106,7 @@ VESPA_THREAD_STACK_TAG(push_executor)
struct DocumentInverterTest : public ::testing::Test {
Schema _schema;
- DocBuilder _b;
+ EmptyDocBuilder _b;
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
WordStore _word_store;
@@ -118,7 +128,7 @@ struct DocumentInverterTest : public ::testing::Test {
DocumentInverterTest()
: _schema(makeSchema()),
- _b(_schema),
+ _b(make_add_fields()),
_invertThreads(SequencedTaskExecutor::create(invert_executor, 1)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 1)),
_word_store(),
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index dcca1f136f6..04d1f08db6f 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -1,13 +1,22 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/structfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/diskindex/zcposoccrandread.h>
#include <vespa/searchlib/fef/fieldpositionsiterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
@@ -37,7 +46,11 @@ namespace search {
using namespace fef;
using namespace index;
+using document::ArrayFieldValue;
using document::Document;
+using document::StructFieldValue;
+using document::UrlDataType;
+using document::WeightedSetFieldValue;
using queryeval::RankedSearchIteratorBase;
using queryeval::SearchIterator;
using search::index::schema::CollectionType;
@@ -505,6 +518,12 @@ make_single_field_schema()
return result;
}
+EmptyDocBuilder::AddFieldsType
+make_single_add_fields()
+{
+ return [](auto& header) { header.addField("f0", document::DataType::T_STRING); };
+}
+
template <typename FieldIndexType>
struct FieldIndexTest : public ::testing::Test {
Schema schema;
@@ -706,6 +725,18 @@ make_multi_field_schema()
return result;
}
+EmptyDocBuilder::AddFieldsType
+make_multi_field_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+}
+
struct FieldIndexCollectionTest : public ::testing::Test {
Schema schema;
FieldIndexCollection fic;
@@ -907,16 +938,16 @@ class InverterTest : public ::testing::Test {
public:
Schema _schema;
FieldIndexCollection _fic;
- DocBuilder _b;
+ EmptyDocBuilder _b;
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
DocumentInverterContext _inv_context;
DocumentInverter _inv;
- InverterTest(const Schema& schema)
+ InverterTest(const Schema& schema, EmptyDocBuilder::AddFieldsType add_fields)
: _schema(schema),
_fic(_schema, MockFieldLengthInspector()),
- _b(_schema),
+ _b(add_fields),
_invertThreads(SequencedTaskExecutor::create(invert_executor, 2)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 2)),
_inv_context(_schema, *_invertThreads, *_pushThreads, _fic),
@@ -938,91 +969,63 @@ public:
class BasicInverterTest : public InverterTest {
public:
- BasicInverterTest() : InverterTest(make_multi_field_schema()) {}
+ BasicInverterTest() : InverterTest(make_multi_field_schema(), make_multi_field_add_fields()) {}
};
TEST_F(BasicInverterTest, require_that_inversion_is_working)
{
Document::UP doc;
+ StringFieldBuilder sfb(_b);
- _b.startDocument("id:ns:searchdocument::10");
- _b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.tokenize("a b c d").build());
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::20");
- _b.startIndexField("f0").
- addStr("a").addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::20");
+ doc->setValue("f0", sfb.tokenize("a a b c d").build());
_inv.invertDocument(20, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::30");
- _b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- addStr("e").addStr("f").
- endField();
- _b.startIndexField("f1").
- addStr("\nw2").addStr("w").addStr("x").
- addStr("\nw3").addStr("y").addStr("z").
- endField();
- _b.startIndexField("f2").
- startElement(4).
- addStr("w").addStr("x").
- endElement().
- startElement(5).
- addStr("y").addStr("z").
- endElement().
- endField();
- _b.startIndexField("f3").
- startElement(6).
- addStr("w").addStr("x").
- endElement().
- startElement(7).
- addStr("y").addStr("z").
- endElement().
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::30");
+ doc->setValue("f0", sfb.tokenize("a b c d e f").build());
+ doc->setValue("f1", sfb.word("\nw2").tokenize(" w x ").
+ word("\nw3").tokenize(" y z").build());
+ {
+ ArrayFieldValue string_array(_b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("w x").build());
+ string_array.add(sfb.tokenize("y z").build());
+ doc->setValue("f2", string_array);
+ }
+ {
+ WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("w x").build(), 6);
+ string_wset.add(sfb.tokenize("y z").build(), 7);
+ doc->setValue("f3", string_wset);
+ }
_inv.invertDocument(30, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::40");
- _b.startIndexField("f0").
- addStr("a").addStr("a").addStr("b").addStr("c").addStr("a").
- addStr("e").addStr("f").
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::40");
+ doc->setValue("f0", sfb.tokenize("a a b c a e f").build());
_inv.invertDocument(40, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::999");
- _b.startIndexField("f0").
- addStr("this").addStr("is").addStr("_a_").addStr("test").
- addStr("for").addStr("insertion").addStr("speed").addStr("with").
- addStr("more").addStr("than").addStr("just").addStr("__a__").
- addStr("few").addStr("words").addStr("present").addStr("in").
- addStr("some").addStr("of").addStr("the").addStr("fields").
- endField();
- _b.startIndexField("f1").
- addStr("the").addStr("other").addStr("field").addStr("also").
- addStr("has").addStr("some").addStr("content").
- endField();
- _b.startIndexField("f2").
- startElement(1).
- addStr("strange").addStr("things").addStr("here").
- addStr("has").addStr("some").addStr("content").
- endElement().
- endField();
- _b.startIndexField("f3").
- startElement(3).
- addStr("not").addStr("a").addStr("weighty").addStr("argument").
- endElement().
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::999");
+ doc->setValue("f0", sfb.tokenize("this is ").word("_a_").
+ tokenize(" test for insertion speed with more than just ").
+ word("__a__").tokenize(" few words present in some of the fields").build());
+ doc->setValue("f1", sfb.tokenize("the other field also has some content").build());
+ {
+ ArrayFieldValue string_array(_b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("strange things here has some content").build());
+ doc->setValue("f2", string_array);
+ }
+ {
+ WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("not a weighty argument").build(), 3);
+ doc->setValue("f3", string_wset);
+ }
for (uint32_t docId = 10000; docId < 20000; ++docId) {
_inv.invertDocument(docId, *doc, {});
myPushDocument(_inv);
@@ -1132,19 +1135,17 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working)
TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remover)
{
- Document::UP doc;
+ StringFieldBuilder sfb(_b);
- _b.startDocument("id:ns:searchdocument::1");
- _b.startIndexField("f0").addStr("a").addStr("b").endField();
- _b.startIndexField("f1").addStr("a").addStr("c").endField();
- Document::UP doc1 = _b.endDocument();
- _inv.invertDocument(1, *doc1.get(), {});
+ auto doc1 = _b.make_document("id:ns:searchdocument::1");
+ doc1->setValue("f0", sfb.tokenize("a b").build());
+ doc1->setValue("f1", sfb.tokenize("a c").build());
+ _inv.invertDocument(1, *doc1, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::2");
- _b.startIndexField("f0").addStr("b").addStr("c").endField();
- Document::UP doc2 = _b.endDocument();
- _inv.invertDocument(2, *doc2.get(), {});
+ auto doc2 = _b.make_document("id:ns:searchdocument::2");
+ doc2->setValue("f0", sfb.tokenize("b c").build());
+ _inv.invertDocument(2, *doc2, {});
myPushDocument(_inv);
EXPECT_TRUE(assertPostingList("[1]", find("a", 0)));
@@ -1172,136 +1173,71 @@ make_uri_schema()
return result;
}
+EmptyDocBuilder::AddFieldsType
+make_uri_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ header.addField("iu", UrlDataType::getInstance().getId())
+ .addField("iau", Array(UrlDataType::getInstance().getId()))
+ .addField("iwu", Wset(UrlDataType::getInstance().getId()));
+ };
+}
+
class UriInverterTest : public InverterTest {
public:
- UriInverterTest() : InverterTest(make_uri_schema()) {}
+ UriInverterTest() : InverterTest(make_uri_schema(), make_uri_add_fields()) {}
};
TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
{
Document::UP doc;
-
- _b.startDocument("id:ns:searchdocument::10");
- _b.startIndexField("iu").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- _b.startIndexField("iau").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- _b.startIndexField("iwu").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- doc = _b.endDocument();
+ StringFieldBuilder sfb(_b);
+ sfb.url_mode(true);
+ StructFieldValue url_value(_b.get_data_type("url"));
+
+ doc = _b.make_document("id:ns:searchdocument::10");
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("81").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("4").build());
+ doc->setValue("iu", url_value);
+ ArrayFieldValue url_array(_b.get_data_type("Array<url>"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("82").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("8").build());
+ url_array.add(url_value);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("fragment", sfb.tokenize("9").build());
+ url_array.add(url_value);
+ doc->setValue("iau", url_array);
+ WeightedSetFieldValue url_wset(_b.get_data_type("WeightedSet<url>"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("83").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("12").build());
+ url_wset.add(url_value, 4);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("port", sfb.tokenize("85").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("13").build());
+ url_wset.add(url_value, 7);
+ doc->setValue("iwu", url_wset);
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
@@ -1360,21 +1296,16 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
class CjkInverterTest : public InverterTest {
public:
- CjkInverterTest() : InverterTest(make_single_field_schema()) {}
+ CjkInverterTest() : InverterTest(make_single_field_schema(), make_single_add_fields()) {}
};
TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working)
{
Document::UP doc;
+ StringFieldBuilder sfb(_b);
- _b.startDocument("id:ns:searchdocument::10");
- _b.startIndexField("f0").
- addStr("我就是那个").
- setAutoSpace(false).
- addStr("大灰狼").
- setAutoSpace(true).
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.word("我就是那个").word("大灰狼").build());
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
index ed049a82c42..bf3a911a579 100644
--- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
@@ -1,8 +1,14 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/field_length_calculator.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/word_store.h>
@@ -13,9 +19,12 @@
namespace search {
+using document::ArrayFieldValue;
using document::Document;
-using index::DocBuilder;
+using document::WeightedSetFieldValue;
+using index::EmptyDocBuilder;
using index::Schema;
+using index::StringFieldBuilder;
using index::schema::CollectionType;
using index::schema::DataType;
@@ -26,93 +35,91 @@ namespace memoryindex {
namespace {
Document::UP
-makeDoc10(DocBuilder &b)
+makeDoc10(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.tokenize("a b c d").build());
+ return doc;
}
Document::UP
-makeDoc11(DocBuilder &b)
+makeDoc11(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::11");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("e").addStr("f").
- endField();
- b.startIndexField("f1").
- addStr("a").addStr("g").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::11");
+ doc->setValue("f0", sfb.tokenize("a b e f").build());
+ doc->setValue("f1", sfb.tokenize("a g").build());
+ return doc;
}
Document::UP
-makeDoc12(DocBuilder &b)
+makeDoc12(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::12");
- b.startIndexField("f0").
- addStr("h").addStr("doc12").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::12");
+ doc->setValue("f0", sfb.tokenize("h doc12").build());
+ return doc;
}
Document::UP
-makeDoc13(DocBuilder &b)
+makeDoc13(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::13");
- b.startIndexField("f0").
- addStr("i").addStr("doc13").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::13");
+ doc->setValue("f0", sfb.tokenize("i doc13").build());
+ return doc;
}
Document::UP
-makeDoc14(DocBuilder &b)
+makeDoc14(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::14");
- b.startIndexField("f0").
- addStr("j").addStr("doc14").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::14");
+ doc->setValue("f0", sfb.tokenize("j doc14").build());
+ return doc;
}
Document::UP
-makeDoc15(DocBuilder &b)
+makeDoc15(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::15");
- return b.endDocument();
+ return b.make_document("id:ns:searchdocument::15");
}
Document::UP
-makeDoc16(DocBuilder &b)
+makeDoc16(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::16");
- b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz").
- addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty").
- addStr("z").endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::16");
+ doc->setValue("f0", sfb.tokenize("foo bar baz").alt_word("altbaz").tokenize(" y").alt_word("alty").tokenize(" z").build());
+ return doc;
}
Document::UP
-makeDoc17(DocBuilder &b)
+makeDoc17(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::17");
- b.startIndexField("f1").addStr("foo0").addStr("bar0").endField();
- b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField();
- b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::17");
+ doc->setValue("f1", sfb.tokenize("foo0 bar0").build());
+ ArrayFieldValue string_array(b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("foo bar").build());
+ string_array.add(sfb.tokenize("bar").build());
+ doc->setValue("f2", string_array);
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("foo2 bar2").build(), 3);
+ string_wset.add(sfb.tokenize("bar2").build(), 4);
+ doc->setValue("f3", string_wset);
+ return doc;
}
vespalib::string corruptWord = "corruptWord";
Document::UP
-makeCorruptDocument(DocBuilder &b, size_t wordOffset)
+makeCorruptDocument(EmptyDocBuilder &b, size_t wordOffset)
{
- b.startDocument("id:ns:searchdocument::18");
- b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField();
- auto doc = b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::18");
+ doc->setValue("f0", sfb.tokenize("before ").word(corruptWord).tokenize(" after z").build());
vespalib::nbostream stream;
doc->serialize(stream);
std::vector<char> raw;
@@ -127,14 +134,14 @@ makeCorruptDocument(DocBuilder &b, size_t wordOffset)
}
vespalib::nbostream badstream;
badstream.write(&raw[0], raw.size());
- return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream);
+ return std::make_unique<Document>(b.get_repo(), badstream);
}
}
struct FieldInverterTest : public ::testing::Test {
Schema _schema;
- DocBuilder _b;
+ EmptyDocBuilder _b;
WordStore _word_store;
FieldIndexRemover _remover;
test::OrderedFieldIndexInserterBackend _inserter_backend;
@@ -151,9 +158,21 @@ struct FieldInverterTest : public ::testing::Test {
return schema;
}
+ static EmptyDocBuilder::AddFieldsType
+ make_add_fields()
+ {
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+ }
+
FieldInverterTest()
: _schema(makeSchema()),
- _b(_schema),
+ _b(make_add_fields()),
_word_store(),
_remover(_word_store),
_inserter_backend(),
diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
index b3ea948dfa7..1730e34adb5 100644
--- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
@@ -1,11 +1,15 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/common/scheduletaskcallback.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/i_field_length_inspector.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
@@ -59,6 +63,12 @@ struct MySetup : public IFieldLengthInspector {
}
return FieldLengthInfo();
}
+ void add_fields(document::config_builder::Struct& header) const {
+ for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) {
+ auto& field = schema.getIndexField(i);
+ header.addField(field.getName(), document::DataType::T_STRING);
+ }
+ }
};
@@ -70,31 +80,38 @@ struct Index {
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
MemoryIndex index;
- DocBuilder builder;
+ EmptyDocBuilder builder;
+ StringFieldBuilder sfb;
+ std::unique_ptr<Document> builder_doc;
uint32_t docid;
std::string currentField;
+ bool add_space;
Index(const MySetup &setup);
~Index();
void closeField() {
if (!currentField.empty()) {
- builder.endField();
+ builder_doc->setValue(currentField, sfb.build());
currentField.clear();
}
}
Index &doc(uint32_t id) {
docid = id;
- builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", id));
+ builder_doc = builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", id));
return *this;
}
Index &field(const std::string &name) {
closeField();
- builder.startIndexField(name);
currentField = name;
+ add_space = false;
return *this;
}
Index &add(const std::string &token) {
- builder.addStr(token);
+ if (add_space) {
+ sfb.space();
+ }
+ add_space = true;
+ sfb.word(token);
return *this;
}
void internalSyncCommit() {
@@ -106,7 +123,7 @@ struct Index {
}
Document::UP commit() {
closeField();
- Document::UP d = builder.endDocument();
+ Document::UP d = std::move(builder_doc);
index.insertDocument(docid, *d, {});
internalSyncCommit();
return d;
@@ -133,9 +150,12 @@ Index::Index(const MySetup &setup)
_invertThreads(SequencedTaskExecutor::create(invert_executor, 2)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 2)),
index(schema, setup, *_invertThreads, *_pushThreads),
- builder(schema),
+ builder([&setup](auto& header) { setup.add_fields(header); }),
+ sfb(builder),
+ builder_doc(),
docid(1),
- currentField()
+ currentField(),
+ add_space(false)
{
}
Index::~Index() = default;
diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
index 969f483eef6..3995f06628c 100644
--- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
@@ -1,11 +1,21 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/memoryindex/url_field_inverter.h>
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/structfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/field_length_calculator.h>
+#include <vespa/searchlib/index/schema_index_fields.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
-#include <vespa/searchlib/memoryindex/url_field_inverter.h>
#include <vespa/searchlib/memoryindex/word_store.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter_backend.h>
@@ -14,6 +24,10 @@
namespace search {
using document::Document;
+using document::ArrayFieldValue;
+using document::StructFieldValue;
+using document::UrlDataType;
+using document::WeightedSetFieldValue;
using index::schema::CollectionType;
using index::schema::DataType;
@@ -26,160 +40,88 @@ namespace {
const vespalib::string url = "url";
Document::UP
-makeDoc10Single(DocBuilder &b)
+makeDoc10Single(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("url").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- addTermAnnotation("altfluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StructFieldValue url_value(b.get_data_type("url"));
+ StringFieldBuilder sfb(b);
+ sfb.url_mode(true);
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("81").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("4").build());
+ doc->setValue("url", url_value);
+ return doc;
}
Document::UP
-makeDoc10Array(DocBuilder &b)
+makeDoc10Array(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("url").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- addTermAnnotation("altfluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StringFieldBuilder sfb(b);
+ sfb.url_mode(true);
+ ArrayFieldValue url_array(b.get_data_type("Array<url>"));
+ StructFieldValue url_value(b.get_data_type("url"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("82").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("8").build());
+ url_array.add(url_value);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("fragment", sfb.tokenize("9").build());
+ url_array.add(url_value);
+ doc->setValue("url", url_array);
+ return doc;
}
Document::UP
-makeDoc10WeightedSet(DocBuilder &b)
+makeDoc10WeightedSet(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("url").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- addTermAnnotation("altfluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StringFieldBuilder sfb(b);
+ sfb.url_mode(true);
+ WeightedSetFieldValue url_wset(b.get_data_type("WeightedSet<url>"));
+ StructFieldValue url_value(b.get_data_type("url"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("83").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("12").build());
+ url_wset.add(url_value, 4);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("port", sfb.tokenize("85").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("13").build());
+ url_wset.add(url_value, 7);
+ doc->setValue("url", url_wset);
+ return doc;
}
Document::UP
-makeDoc10Empty(DocBuilder &b)
+makeDoc10Empty(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- return b.endDocument();
+ return b.make_document("id:ns:searchdocument::10");
}
}
struct UrlFieldInverterTest : public ::testing::Test {
Schema _schema;
- DocBuilder _b;
+ EmptyDocBuilder _b;
WordStore _word_store;
FieldIndexRemover _remover;
test::OrderedFieldIndexInserterBackend _inserter_backend;
@@ -195,9 +137,10 @@ struct UrlFieldInverterTest : public ::testing::Test {
return schema;
}
- UrlFieldInverterTest(Schema::CollectionType collectionType)
+ UrlFieldInverterTest(Schema::CollectionType collectionType,
+ EmptyDocBuilder::AddFieldsType add_fields)
: _schema(makeSchema(collectionType)),
- _b(_schema),
+ _b(add_fields),
_word_store(),
_remover(_word_store),
_inserter_backend(),
@@ -250,16 +193,32 @@ struct UrlFieldInverterTest : public ::testing::Test {
UrlFieldInverterTest::~UrlFieldInverterTest() = default;
+EmptyDocBuilder::AddFieldsType
+add_single_url = [](auto& header) {
+ header.addField("url", UrlDataType::getInstance().getId()); };
+
+EmptyDocBuilder::AddFieldsType
+add_array_url = [](auto& header) {
+ using namespace document::config_builder;
+ header.addField("url", Array(UrlDataType::getInstance().getId())); };
+
+EmptyDocBuilder::AddFieldsType
+add_wset_url = [](auto& header) {
+ using namespace document::config_builder;
+ header.addField("url", Wset(UrlDataType::getInstance().getId())); };
+
+
+
struct SingleInverterTest : public UrlFieldInverterTest {
- SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE) {}
+ SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE, add_single_url) {}
};
struct ArrayInverterTest : public UrlFieldInverterTest {
- ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY) {}
+ ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY, add_array_url) {}
};
struct WeightedSetInverterTest : public UrlFieldInverterTest {
- WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET) {}
+ WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET, add_wset_url) {}
};
diff --git a/searchlib/src/vespa/searchlib/index/CMakeLists.txt b/searchlib/src/vespa/searchlib/index/CMakeLists.txt
index 958614844d1..afeb020598b 100644
--- a/searchlib/src/vespa/searchlib/index/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/index/CMakeLists.txt
@@ -2,9 +2,7 @@
vespa_add_library(searchlib_searchlib_index OBJECT
SOURCES
dictionaryfile.cpp
- docbuilder.cpp
docidandfeatures.cpp
- doctypebuilder.cpp
dummyfileheadercontext.cpp
empty_doc_builder.cpp
indexbuilder.cpp
@@ -15,6 +13,7 @@ vespa_add_library(searchlib_searchlib_index OBJECT
postinglistparams.cpp
schemautil.cpp
schema_index_fields.cpp
+ string_field_builder.cpp
uri_field.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.cpp b/searchlib/src/vespa/searchlib/index/docbuilder.cpp
deleted file mode 100644
index d6169f2f396..00000000000
--- a/searchlib/src/vespa/searchlib/index/docbuilder.cpp
+++ /dev/null
@@ -1,814 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "docbuilder.h"
-#include <vespa/document/datatype/urldatatype.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/document/repo/documenttyperepo.h>
-#include <vespa/fastlib/text/unicodeutil.h>
-#include <vespa/vespalib/geo/zcurve.h>
-#include <vespa/vespalib/text/utf8.h>
-#include <vespa/eval/eval/value.h>
-#include <vespa/vespalib/data/slime/slime.h>
-
-using namespace document;
-using namespace search::index;
-
-using search::index::schema::CollectionType;
-using vespalib::Utf8Reader;
-using vespalib::Utf8Writer;
-using vespalib::geo::ZCurve;
-
-namespace {
-
-void
-insertStr(const Schema::Field & sfield, document::FieldValue * fvalue, const vespalib::string & val)
-{
- if (sfield.getDataType() == schema::DataType::STRING ||
- sfield.getDataType() == schema::DataType::RAW)
- {
- (dynamic_cast<LiteralFieldValueB *>(fvalue))->setValue(val);
- } else {
- throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str()));
- }
-}
-
-void
-insertInt(const Schema::Field & sfield, document::FieldValue * fvalue, int64_t val)
-{
- if (sfield.getDataType() == schema::DataType::INT8) {
- (dynamic_cast<ByteFieldValue *>(fvalue))->setValue((uint8_t)val);
- } else if (sfield.getDataType() == schema::DataType::INT16) {
- (dynamic_cast<ShortFieldValue *>(fvalue))->setValue((int16_t)val);
- } else if (sfield.getDataType() == schema::DataType::INT32) {
- (dynamic_cast<IntFieldValue *>(fvalue))->setValue((int32_t)val);
- } else if (sfield.getDataType() == schema::DataType::INT64) {
- (dynamic_cast<LongFieldValue *>(fvalue))->setValue(val);
- } else {
- throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str()));
- }
-}
-
-void
-insertFloat(const Schema::Field & sfield, document::FieldValue * fvalue, double val)
-{
- if (sfield.getDataType() == schema::DataType::FLOAT) {
- (dynamic_cast<FloatFieldValue *>(fvalue))->setValue((float)val);
- } else if (sfield.getDataType() == schema::DataType::DOUBLE) {
- (dynamic_cast<DoubleFieldValue *>(fvalue))->setValue(val);
- } else {
- throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str()));
- }
-}
-
-void insertPredicate(const Schema::Field &sfield,
- document::FieldValue *fvalue,
- std::unique_ptr<vespalib::Slime> val) {
- if (sfield.getDataType() == schema::DataType::BOOLEANTREE) {
- *(dynamic_cast<PredicateFieldValue *>(fvalue)) =
- PredicateFieldValue(std::move(val));
- } else {
- throw DocBuilder::Error(vespalib::make_string(
- "Field '%s' not compatible",
- sfield.getName().c_str()));
- }
-}
-
-void insertTensor(const Schema::Field &schemaField,
- document::FieldValue *fvalue,
- std::unique_ptr<vespalib::eval::Value> val) {
- if (schemaField.getDataType() == schema::DataType::TENSOR) {
- *(dynamic_cast<TensorFieldValue *>(fvalue)) = std::move(val);
- } else {
- throw DocBuilder::Error(vespalib::make_string(
- "Field '%s' not compatible",
- schemaField.getName().c_str()));
- }
-}
-
-void
-insertPosition(const Schema::Field & sfield,
- document::FieldValue * fvalue, int32_t xpos, int32_t ypos)
-{
- assert(*fvalue->getDataType() == *DataType::LONG);
- assert(sfield.getDataType() == schema::DataType::INT64);
- (void) sfield;
- int64_t zpos = ZCurve::encode(xpos, ypos);
- document::LongFieldValue *zvalue =
- dynamic_cast<LongFieldValue *>(fvalue);
- zvalue->setValue(zpos);
-}
-
-}
-
-namespace docbuilderkludge
-{
-
-namespace linguistics
-{
-
-const vespalib::string SPANTREE_NAME("linguistics");
-
-enum TokenType {
- UNKNOWN = 0,
- SPACE = 1,
- PUNCTUATION = 2,
- SYMBOL = 3,
- ALPHABETIC = 4,
- NUMERIC = 5,
- MARKER = 6
-};
-
-}
-
-}
-
-using namespace docbuilderkludge;
-
-namespace {
-
-Annotation
-makeTokenType(linguistics::TokenType type)
-{
- return Annotation(*AnnotationType::TOKEN_TYPE, std::make_unique<IntFieldValue>(type));
-}
-
-}
-
-namespace search::index {
-
-VESPA_IMPLEMENT_EXCEPTION(DocBuilderError, vespalib::Exception);
-
-DocBuilder::FieldHandle::FieldHandle(const document::Field & dfield, const Schema::Field & field) :
- _sfield(field),
- _value(),
- _element()
-{
- _value = dfield.createValue();
-}
-
-DocBuilder::CollectionFieldHandle::CollectionFieldHandle(const document::Field & dfield, const Schema::Field & field) :
- FieldHandle(dfield, field),
- _elementWeight(1)
-{
-}
-
-void
-DocBuilder::CollectionFieldHandle::startElement(int32_t weight)
-{
- assert(!_element);
- _elementWeight = weight;
- const CollectionFieldValue * value = dynamic_cast<CollectionFieldValue *>(_value.get());
- _element = value->createNested();
-}
-
-void
-DocBuilder::CollectionFieldHandle::endElement()
-{
- if (_sfield.getCollectionType() == CollectionType::ARRAY) {
- onEndElement();
- ArrayFieldValue * value = dynamic_cast<ArrayFieldValue *>(_value.get());
- value->add(*_element);
- } else if (_sfield.getCollectionType() == CollectionType::WEIGHTEDSET) {
- onEndElement();
- WeightedSetFieldValue * value = dynamic_cast<WeightedSetFieldValue *>(_value.get());
- value->add(*_element, _elementWeight);
- } else {
- throw Error(vespalib::make_string("Field '%s' not compatible", _sfield.getName().c_str()));
- }
- _element.reset();
-}
-
-DocBuilder::IndexFieldHandle::IndexFieldHandle(const FixedTypeRepo & repo, const document::Field & dfield, const Schema::Field & sfield)
- : CollectionFieldHandle(dfield, sfield),
- _str(),
- _strSymbols(0u),
- _spanList(nullptr),
- _spanTree(),
- _lastSpan(nullptr),
- _spanStart(0u),
- _autoAnnotate(true),
- _autoSpace(true),
- _skipAutoSpace(true),
- _uriField(false),
- _subField(),
- _repo(repo)
-{
- _str.reserve(1023);
-
- if (_sfield.getCollectionType() == CollectionType::SINGLE) {
- if (*_value->getDataType() == document::UrlDataType::getInstance()) {
- _uriField = true;
- }
- } else {
- const CollectionFieldValue * value = dynamic_cast<CollectionFieldValue *>(_value.get());
- if (value->getNestedType() == document::UrlDataType::getInstance()) {
- _uriField = true;
- }
- }
- startAnnotate();
-}
-
-void
-DocBuilder::IndexFieldHandle::append(const vespalib::string &val)
-{
- _strSymbols += val.size();
- _str += val;
-}
-
-void
-DocBuilder::IndexFieldHandle::addStr(const vespalib::string &val)
-{
- assert(_spanTree);
- if (val.empty()) {
- return;
- }
- if (!_skipAutoSpace && _autoSpace) {
- addSpace();
- }
- _skipAutoSpace = false;
- _spanStart = _strSymbols;
- append(val);
- if (_autoAnnotate) {
- addSpan();
- addTermAnnotation();
- if (val[0] >= '0' && val[0] <= '9') {
- addNumericTokenAnnotation();
- } else {
- addAlphabeticTokenAnnotation();
- }
- }
-}
-
-void
-DocBuilder::IndexFieldHandle::addSpace()
-{
- addNoWordStr(" ");
-}
-
-void
-DocBuilder::IndexFieldHandle::addNoWordStr(const vespalib::string &val)
-{
- assert(_spanTree);
- if (val.empty()) {
- return;
- }
- _spanStart = _strSymbols;
- append(val);
- if (_autoAnnotate) {
- addSpan();
- if (val[0] == ' ' || val[0] == '\t') {
- addSpaceTokenAnnotation();
- } else if (val[0] >= '0' && val[0] <= '9') {
- addNumericTokenAnnotation();
- } else {
- addAlphabeticTokenAnnotation();
- }
-
- }
- _skipAutoSpace = true;
-}
-
-void
-DocBuilder::IndexFieldHandle::addTokenizedString(const vespalib::string &val,
- bool urlMode)
-{
- Utf8Reader r(val);
- vespalib::string sbuf;
- Utf8Writer w(sbuf);
- uint32_t c = 0u;
- bool oldWord = false;
- assert(_uriField == urlMode);
- assert(_uriField != _subField.empty());
-
- while (r.hasMore()) {
- c = r.getChar();
- bool newWord = Fast_UnicodeUtil::IsWordChar(c) ||
- (urlMode && (c == '-' || c == '_'));
- if (oldWord != newWord) {
- if (!sbuf.empty()) {
- if (oldWord) {
- addStr(sbuf);
- } else {
- addNoWordStr(sbuf);
- }
- sbuf.clear();
- }
- oldWord = newWord;
- }
- w.putChar(c);
- }
- if (!sbuf.empty()) {
- if (oldWord) {
- addStr(sbuf);
- } else {
- addNoWordStr(sbuf);
- }
- }
-}
-
-void
-DocBuilder::IndexFieldHandle::addSpan(size_t start, size_t len)
-{
- const SpanNode &span = _spanList->add(std::make_unique<Span>(start, len));
- _lastSpan = &span;
-}
-
-void
-DocBuilder::IndexFieldHandle::addSpan()
-{
- size_t endPos = _strSymbols;
- assert(endPos > _spanStart);
- addSpan(_spanStart, endPos - _spanStart);
- _spanStart = endPos;
-}
-
-void
-DocBuilder::IndexFieldHandle::addSpaceTokenAnnotation()
-{
- assert(_spanTree);
- assert(_lastSpan != nullptr);
- _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::SPACE));
-}
-
-void
-DocBuilder::IndexFieldHandle::addNumericTokenAnnotation()
-{
- assert(_spanTree);
- assert(_lastSpan != nullptr);
- _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::NUMERIC));
-}
-
-void
-DocBuilder::IndexFieldHandle::addAlphabeticTokenAnnotation()
-{
- assert(_spanTree);
- assert(_lastSpan != nullptr);
- _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::ALPHABETIC));
-}
-
-void
-DocBuilder::IndexFieldHandle::addTermAnnotation()
-{
- assert(_spanTree);
- assert(_lastSpan != nullptr);
- _spanTree->annotate(*_lastSpan, *AnnotationType::TERM);
-}
-
-void
-DocBuilder::IndexFieldHandle::addTermAnnotation(const vespalib::string &val)
-{
- assert(_spanTree);
- assert(_lastSpan != nullptr);
- _spanTree->annotate(*_lastSpan,
- Annotation(*AnnotationType::TERM,
- std::make_unique<StringFieldValue>(val)));
-}
-
-void
-DocBuilder::IndexFieldHandle::onEndElement()
-{
- // Flush data for index field.
- assert(_subField.empty());
- if (_uriField) {
- return;
- }
- StringFieldValue * value;
- if (_sfield.getCollectionType() != CollectionType::SINGLE) {
- value = dynamic_cast<StringFieldValue *>(_element.get());
- } else {
- value = dynamic_cast<StringFieldValue *>(_value.get());
- }
- value->setValue(_str);
- // Also drop all spans no annotation for now
- if (_spanTree->numAnnotations() > 0u) {
- StringFieldValue::SpanTrees trees;
- trees.emplace_back(std::move(_spanTree));
- value->setSpanTrees(trees, _repo);
- } else {
- _spanTree.reset();
- }
- _spanList = nullptr;
- _lastSpan = nullptr;
- _spanStart = 0u;
- _strSymbols = 0u;
- _str.clear();
- _skipAutoSpace = true;
- startAnnotate();
-}
-
-void
-DocBuilder::IndexFieldHandle::onEndField()
-{
- if (_sfield.getCollectionType() == CollectionType::SINGLE) {
- onEndElement();
- }
-}
-
-void
-DocBuilder::IndexFieldHandle::startAnnotate()
-{
- SpanList::UP span_list(new SpanList);
- _spanList = span_list.get();
- _spanTree.reset(new SpanTree(linguistics::SPANTREE_NAME, std::move(span_list)));
-}
-
-void
-DocBuilder::IndexFieldHandle::setAutoAnnotate(bool autoAnnotate)
-{
- _autoAnnotate = autoAnnotate;
-}
-
-void
-DocBuilder::IndexFieldHandle::setAutoSpace(bool autoSpace)
-{
- _autoSpace = autoSpace;
-}
-
-void
-DocBuilder::IndexFieldHandle::startSubField(const vespalib::string &subField)
-{
- assert(_subField.empty());
- assert(_uriField);
- _subField = subField;
-}
-
-void
-DocBuilder::IndexFieldHandle::endSubField()
-{
- assert(!_subField.empty());
- assert(_uriField);
- StructuredFieldValue *sValue;
- if (_sfield.getCollectionType() != CollectionType::SINGLE) {
- sValue = dynamic_cast<StructFieldValue *>(_element.get());
- } else {
- sValue = dynamic_cast<StructFieldValue *>(_value.get());
- }
- const Field &f = sValue->getField(_subField);
- FieldValue::UP fval(f.getDataType().createFieldValue());
- *fval = _str;
- StringFieldValue *value = dynamic_cast<StringFieldValue *>(fval.get());
- StringFieldValue::SpanTrees trees;
- trees.emplace_back(std::move(_spanTree));
- value->setSpanTrees(trees, _repo);
- sValue->setValue(f, *fval);
- _spanList = nullptr;
- _lastSpan = nullptr;
- _spanStart = 0u;
- _strSymbols = 0u;
- _str.clear();
- _skipAutoSpace = true;
- startAnnotate();
- _subField.clear();
-}
-
-DocBuilder::AttributeFieldHandle::
-AttributeFieldHandle(const document::Field &dfield,
- const Schema::Field &sfield)
- : CollectionFieldHandle(dfield, sfield)
-{
-}
-
-void
-DocBuilder::AttributeFieldHandle::addStr(const vespalib::string & val)
-{
- if (_element) {
- insertStr(_sfield, _element.get(), val);
- } else {
- insertStr(_sfield, _value.get(), val);
- }
-}
-
-void
-DocBuilder::AttributeFieldHandle::addInt(int64_t val)
-{
- if (_element) {
- insertInt(_sfield, _element.get(), val);
- } else {
- insertInt(_sfield, _value.get(), val);
- }
-}
-
-void
-DocBuilder::AttributeFieldHandle::addFloat(double val)
-{
- if (_element) {
- insertFloat(_sfield, _element.get(), val);
- } else {
- insertFloat(_sfield, _value.get(), val);
- }
-}
-
-void
-DocBuilder::AttributeFieldHandle::addPredicate(
- std::unique_ptr<vespalib::Slime> val)
-{
- if (_element) {
- insertPredicate(_sfield, _element.get(), std::move(val));
- } else {
- insertPredicate(_sfield, _value.get(), std::move(val));
- }
-}
-
-void
-DocBuilder::AttributeFieldHandle::addTensor(
- std::unique_ptr<vespalib::eval::Value> val)
-{
- if (_element) {
- insertTensor(_sfield, _element.get(), std::move(val));
- } else {
- insertTensor(_sfield, _value.get(), std::move(val));
- }
-}
-
-void
-DocBuilder::AttributeFieldHandle::addPosition(int32_t xpos, int32_t ypos)
-{
- if (_element) {
- insertPosition(_sfield, _element.get(), xpos, ypos);
- } else {
- insertPosition(_sfield, _value.get(), xpos, ypos);
- }
-}
-
-DocBuilder::DocumentHandle::DocumentHandle(document::Document &doc, const vespalib::string & docId)
- : _type(&doc.getType()),
- _doc(&doc),
- _fieldHandle(),
- _repo(*_doc->getRepo(), *_type)
-{
- (void) docId;
-}
-
-DocBuilder::DocumentHandle::~DocumentHandle() = default;
-
-void
-DocBuilder::DocumentHandle::startIndexField(const Schema::Field & sfield) {
- _fieldHandle.reset(new IndexFieldHandle(_repo, _type->getField(sfield.getName()), sfield));
-}
-void
-DocBuilder::DocumentHandle::startAttributeField(const Schema::Field & sfield) {
- _fieldHandle.reset(new AttributeFieldHandle(_type->getField(sfield.getName()), sfield));
-}
-
-void
-DocBuilder::DocumentHandle::endField() {
- _fieldHandle->onEndField();
- _doc->setValue(_type->getField(_fieldHandle->getField().getName()), *_fieldHandle->getValue());
- _fieldHandle.reset();
-}
-
-DocBuilder::DocBuilder(const Schema &schema)
- : _schema(schema),
- _doctypes_config(DocTypeBuilder(schema).makeConfig()),
- _repo(std::make_shared<DocumentTypeRepo>(_doctypes_config)),
- _docType(*_repo->getDocumentType("searchdocument")),
- _doc(),
- _handleDoc(),
- _currDoc()
-{
-}
-
-DocBuilder::~DocBuilder() = default;
-
-DocBuilder &
-DocBuilder::startDocument(const vespalib::string & docId)
-{
- _doc = std::make_unique<Document>(_docType, DocumentId(docId));
- _doc->setRepo(*_repo);
- _handleDoc = std::make_shared<DocumentHandle>(*_doc, docId);
- return *this;
-}
-
-document::Document::UP
-DocBuilder::endDocument()
-{
- _handleDoc->endDocument(_doc);
- return std::move(_doc);
-}
-
-DocBuilder &
-DocBuilder::startIndexField(const vespalib::string & name)
-{
- assert(!_handleDoc->getFieldHandle());
- uint32_t field_id = _schema.getIndexFieldId(name);
- assert(field_id != Schema::UNKNOWN_FIELD_ID);
- _handleDoc->startIndexField(_schema.getIndexField(field_id));
- _currDoc = _handleDoc.get();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::startAttributeField(const vespalib::string & name)
-{
- assert(!_handleDoc->getFieldHandle());
- uint32_t field_id = _schema.getIndexFieldId(name);
- assert(field_id == Schema::UNKNOWN_FIELD_ID);
- field_id = _schema.getAttributeFieldId(name);
- assert(field_id != Schema::UNKNOWN_FIELD_ID);
- _handleDoc->startAttributeField(_schema.getAttributeField(field_id));
- _currDoc = _handleDoc.get();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::endField()
-{
- assert(_currDoc != nullptr);
- _currDoc->endField();
- _currDoc = nullptr;
- return *this;
-}
-
-DocBuilder &
-DocBuilder::startElement(int32_t weight)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->startElement(weight);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::endElement()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->endElement();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addStr(const vespalib::string & str)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addStr(str);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addSpace()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addSpace();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addNoWordStr(const vespalib::string & str)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addNoWordStr(str);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addTokenizedString(const vespalib::string &str)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addTokenizedString(str, false);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addUrlTokenizedString(const vespalib::string &str)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addTokenizedString(str, true);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addInt(int64_t val)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addInt(val);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addFloat(double val)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addFloat(val);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addPredicate(std::unique_ptr<vespalib::Slime> val)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addPredicate(std::move(val));
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addTensor(std::unique_ptr<vespalib::eval::Value> val)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addTensor(std::move(val));
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addSpan(size_t start, size_t len)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addSpan(start, len);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addSpan()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addSpan();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addSpaceTokenAnnotation()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addSpaceTokenAnnotation();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addNumericTokenAnnotation()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addNumericTokenAnnotation();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addAlphabeticTokenAnnotation()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addAlphabeticTokenAnnotation();
- return *this;
-}
-
-DocBuilder&
-DocBuilder::addTermAnnotation()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addTermAnnotation();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addTermAnnotation(const vespalib::string &val)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addTermAnnotation(val);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addPosition(int32_t xpos, int32_t ypos)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addPosition(xpos, ypos);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::addRaw(const void *buf, size_t len)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->addRaw(buf, len);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::startSubField(const vespalib::string &subField)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->startSubField(subField);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::endSubField()
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->endSubField();
- return *this;
-}
-
-DocBuilder &
-DocBuilder::setAutoAnnotate(bool autoAnnotate)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->setAutoAnnotate(autoAnnotate);
- return *this;
-}
-
-DocBuilder &
-DocBuilder::setAutoSpace(bool autoSpace)
-{
- assert(_currDoc != nullptr);
- _currDoc->getFieldHandle()->setAutoSpace(autoSpace);
- return *this;
-}
-
-}
diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.h b/searchlib/src/vespa/searchlib/index/docbuilder.h
deleted file mode 100644
index a8a37b57070..00000000000
--- a/searchlib/src/vespa/searchlib/index/docbuilder.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "doctypebuilder.h"
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/document/fieldvalue/fieldvalues.h>
-#include <vespa/document/annotation/annotation.h>
-#include <vespa/document/annotation/span.h>
-#include <vespa/document/annotation/spanlist.h>
-#include <vespa/document/annotation/spantree.h>
-#include <vespa/vespalib/util/exception.h>
-#include <vespa/vespalib/util/stringfmt.h>
-
-namespace vespalib::eval { struct Value; }
-
-namespace search::index {
-
-VESPA_DEFINE_EXCEPTION(DocBuilderError, vespalib::Exception);
-
-/**
- * Builder class used to generate a search document that corresponds
- * to an index schema.
- **/
-class DocBuilder {
-public:
- typedef DocBuilderError Error;
-
-private:
- /**
- * Base class for handling the construction of a field.
- **/
- class FieldHandle {
- public:
- typedef std::shared_ptr<FieldHandle> SP;
- protected:
- const Schema::Field & _sfield;
- document::FieldValue::UP _value;
- document::FieldValue::UP _element;
- public:
- FieldHandle(const document::Field & dfield, const Schema::Field & field);
- virtual ~FieldHandle() {}
- virtual void startElement(int32_t weight) { (void) weight; throw Error("Function not supported"); }
- virtual void endElement() { throw Error("Function not supported"); }
- virtual void addStr(const vespalib::string & val) { (void) val; throw Error("Function not supported"); }
-
- virtual void addSpace() {
- throw Error("Function not supported");
- }
-
- virtual void addNoWordStr(const vespalib::string & val) {
- (void) val;
- throw Error("Function not supported");
- }
-
- virtual void addTokenizedString(const vespalib::string &val, bool urlMode) {
- (void) val;
- (void) urlMode;
- throw Error("Function not supported");
- }
-
- virtual void addSpan(size_t start, size_t len) {
- (void) start;
- (void) len;
- throw Error("Function not supported");
- }
-
- virtual void addSpan() {
- throw Error("Function not supported");
- }
-
- virtual void addSpaceTokenAnnotation() {
- throw Error("Function not supported");
- }
-
- virtual void addNumericTokenAnnotation() {
- throw Error("Function not supported");
- }
-
- virtual void addAlphabeticTokenAnnotation() {
- throw Error("Function not supported");
- }
-
- virtual void addTermAnnotation() {
- throw Error("Function not supported");
- }
-
- virtual void addTermAnnotation(const vespalib::string &val) {
- (void) val;
- throw Error("Function not supported");
- }
-
- virtual void addInt(int64_t val) { (void) val; throw Error("Function not supported"); }
- virtual void addFloat(double val) { (void) val; throw Error("Function not supported"); }
- virtual void addPredicate(std::unique_ptr<vespalib::Slime>) {
- throw Error("Function not supported");
- }
- virtual void addTensor(std::unique_ptr<vespalib::eval::Value>) {
- throw Error("Function not supported");
- }
- const document::FieldValue::UP & getValue() const { return _value; }
- const Schema::Field & getField() const { return _sfield; }
-
- virtual void onEndElement() {}
- virtual void onEndField() {}
-
- virtual void setAutoAnnotate(bool autoAnnotate) {
- (void) autoAnnotate;
- throw Error("Function not supported");
- }
-
- virtual void setAutoSpace(bool autoSpace) {
- (void) autoSpace;
- throw Error("Function not supported");
- }
-
- virtual void addPosition(int32_t xpos, int32_t ypos) {
- (void) xpos;
- (void) ypos;
- throw Error("Function not supported");
- }
-
- virtual void addRaw(const void *buf, size_t len) {
- (void) buf;
- (void) len;
- throw Error("Function not supported");
- }
-
- virtual void startSubField(const vespalib::string &subField) {
- (void) subField;
- throw Error("Function not supported");
- }
-
- virtual void endSubField() {
- throw Error("Function not supported");
- }
- };
-
- /**
- * Class that can handle multi value fields.
- **/
- class CollectionFieldHandle : public FieldHandle {
- private:
- int32_t _elementWeight;
- public:
- CollectionFieldHandle(const document::Field & dfield, const Schema::Field & sfield);
- void startElement(int32_t weight) override;
- void endElement() override;
- };
-
- /**
- * Class for handling the construction of the content of an index field.
- **/
- class IndexFieldHandle : public CollectionFieldHandle {
- vespalib::string _str; // adjusted as word comes along
- size_t _strSymbols; // symbols in string, assuming UTF8
- document::SpanList *_spanList; // owned by _spanTree
- document::SpanTree::UP _spanTree;
- const document::SpanNode *_lastSpan;
- size_t _spanStart; // start of span
- bool _autoAnnotate; // Add annotation when adding strings
- bool _autoSpace; // Add space before strings
- bool _skipAutoSpace; // one shot skip of adding space
- bool _uriField; // URI handling (special struct case)
- vespalib::string _subField;
- const document::FixedTypeRepo & _repo;
-
- void append(const vespalib::string &val);
-
- public:
- IndexFieldHandle(const document::FixedTypeRepo & repo,
- const document::Field &dfield,
- const Schema::Field &sfield);
-
- void addStr(const vespalib::string & val) override;
- void addSpace() override;
- void addNoWordStr(const vespalib::string & val) override;
- void addTokenizedString(const vespalib::string &val, bool urlMode) override;
- void addSpan(size_t start, size_t len) override;
- void addSpan() override;
- void addSpaceTokenAnnotation() override;
- void addNumericTokenAnnotation() override;
- void addAlphabeticTokenAnnotation() override;
- void addTermAnnotation() override;
- void addTermAnnotation(const vespalib::string &val) override;
- void onEndElement() override;
- void onEndField() override;
- void startAnnotate();
- void setAutoAnnotate(bool autoAnnotate) override;
- void setAutoSpace(bool autoSpace) override;
- void startSubField(const vespalib::string &subField) override;
- void endSubField() override;
- };
-
- /**
- * Class for handling the construction of the content of an attribute field.
- **/
- class AttributeFieldHandle : public CollectionFieldHandle {
- public:
- AttributeFieldHandle(const document::Field & dfield, const Schema::Field & sfield);
- void addStr(const vespalib::string & val) override;
- void addInt(int64_t val) override;
- void addFloat(double val) override;
- void addPredicate(std::unique_ptr<vespalib::Slime> val) override;
- void addTensor(std::unique_ptr<vespalib::eval::Value> val) override;
- void addPosition(int32_t xpos, int32_t ypos) override;
- };
-
- /**
- * Class for handling the construction of a document (set of fields).
- **/
- class DocumentHandle {
- public:
- typedef std::shared_ptr<DocumentHandle> SP;
- private:
- const document::DocumentType * _type;
- document::Document *const _doc;
- FieldHandle::SP _fieldHandle;
- document::FixedTypeRepo _repo;
- public:
- DocumentHandle(document::Document &doc, const vespalib::string & docId);
- ~DocumentHandle();
- const FieldHandle::SP & getFieldHandle() const { return _fieldHandle; }
- void startIndexField(const Schema::Field & sfield);
- void startAttributeField(const Schema::Field & sfield);
- void endField();
- void endDocument(const document::Document::UP & doc) {
- (void) doc;
- }
- };
-
- const Schema & _schema;
- document::config::DocumenttypesConfig _doctypes_config;
- std::shared_ptr<const document::DocumentTypeRepo> _repo;
- const document::DocumentType &_docType;
- document::Document::UP _doc; // the document we are about to generate
-
- DocumentHandle::SP _handleDoc; // handle for all fields
- DocumentHandle * _currDoc; // the current document handle
-
-public:
- DocBuilder(const Schema & schema);
- ~DocBuilder();
-
- DocBuilder & startDocument(const vespalib::string & docId);
- document::Document::UP endDocument();
-
- DocBuilder & startIndexField(const vespalib::string & name);
- DocBuilder & startAttributeField(const vespalib::string & name);
- DocBuilder & endField();
- DocBuilder & startElement(int32_t weight = 1);
- DocBuilder & endElement();
- DocBuilder & addStr(const vespalib::string & val);
- DocBuilder & addSpace();
- DocBuilder & addNoWordStr(const vespalib::string & val);
- DocBuilder & addInt(int64_t val);
- DocBuilder & addFloat(double val);
- DocBuilder & addPredicate(std::unique_ptr<vespalib::Slime> val);
- DocBuilder & addTensor(std::unique_ptr<vespalib::eval::Value> val);
- DocBuilder &addTokenizedString(const vespalib::string &val);
- DocBuilder &addUrlTokenizedString(const vespalib::string &val);
- DocBuilder &addSpan(size_t start, size_t len);
- DocBuilder &addSpan();
- DocBuilder &addSpaceTokenAnnotation();
- DocBuilder &addNumericTokenAnnotation();
- DocBuilder &addAlphabeticTokenAnnotation();
- DocBuilder &addTermAnnotation();
- DocBuilder &addTermAnnotation(const vespalib::string &val);
- DocBuilder &setAutoAnnotate(bool autoAnnotate);
- DocBuilder &setAutoSpace(bool autoSpace);
- DocBuilder &addPosition(int32_t xpos, int32_t ypos);
- DocBuilder &addRaw(const void *buf, size_t len);
- DocBuilder &startSubField(const vespalib::string &subField);
- DocBuilder &endSubField();
- static bool hasAnnotations() { return true; }
-
- const document::DocumentType &getDocumentType() const { return _docType; }
- const std::shared_ptr<const document::DocumentTypeRepo> &getDocumentTypeRepo() const { return _repo; }
- document::config::DocumenttypesConfig getDocumenttypesConfig() const { return _doctypes_config; }
-};
-
-}
diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
deleted file mode 100644
index 5f655419471..00000000000
--- a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "doctypebuilder.h"
-#include <vespa/document/datatype/urldatatype.h>
-#include <vespa/document/datatype/tensor_data_type.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/document/repo/configbuilder.h>
-#include <set>
-
-using namespace document;
-
-namespace search::index {
-namespace {
-
-DataType::Type convert(Schema::DataType type) {
- switch (type) {
- case schema::DataType::BOOL:
- case schema::DataType::UINT2:
- case schema::DataType::UINT4:
- case schema::DataType::INT8:
- return DataType::T_BYTE;
- case schema::DataType::INT16:
- return DataType::T_SHORT;
- case schema::DataType::INT32:
- return DataType::T_INT;
- case schema::DataType::INT64:
- return DataType::T_LONG;
- case schema::DataType::FLOAT:
- return DataType::T_FLOAT;
- case schema::DataType::DOUBLE:
- return DataType::T_DOUBLE;
- case schema::DataType::STRING:
- return DataType::T_STRING;
- case schema::DataType::RAW:
- return DataType::T_RAW;
- case schema::DataType::BOOLEANTREE:
- return DataType::T_PREDICATE;
- case schema::DataType::TENSOR:
- return DataType::T_TENSOR;
- default:
- break;
- }
- assert(!"Unknown datatype in schema");
- return DataType::MAX;
-}
-
-void
-insertStructType(document::config::DocumenttypesConfig::Documenttype & cfg, const StructDataType & structType)
-{
- typedef document::config::DocumenttypesConfig DTC;
- DTC::Documenttype::Datatype::Sstruct cfgStruct;
- cfgStruct.name = structType.getName();
- Field::Set fieldSet = structType.getFieldSet();
- for (const Field * field : fieldSet) {
- DTC::Documenttype::Datatype::Sstruct::Field sField;
- sField.name = field->getName();
- sField.datatype = field->getDataType().getId();
- sField.id = field->getId();
- cfgStruct.field.push_back(sField);
- }
- cfg.datatype.push_back(DTC::Documenttype::Datatype());
- cfg.datatype.back().sstruct = cfgStruct;
- cfg.datatype.back().id = structType.getId();
-}
-
-using namespace document::config_builder;
-
-TypeOrId makeCollection(TypeOrId datatype, Schema::CollectionType collection_type) {
- switch (collection_type) {
- case schema::CollectionType::ARRAY:
- return Array(datatype);
- case schema::CollectionType::WEIGHTEDSET:
- // TODO: consider using array of struct<primitive,int32> to keep order
- return Wset(datatype);
- default:
- return datatype;
- }
-}
-
-struct TypeCache {
- std::map<std::pair<int, Schema::CollectionType>, TypeOrId> types;
-
- TypeOrId getType(TypeOrId datatype, Schema::CollectionType c_type) {
- TypeOrId type = makeCollection(datatype, c_type);
- std::pair<int, Schema::CollectionType> key = std::make_pair(datatype.id, c_type);
- if (types.find(key) == types.end()) {
- types.insert(std::make_pair(key, type));
- }
- return types.find(key)->second;
- }
-};
-
-}
-
-DocTypeBuilder::DocTypeBuilder(const Schema &schema)
- : _schema(schema),
- _iFields()
-{
- _iFields.setup(schema);
-}
-
-document::config::DocumenttypesConfig DocTypeBuilder::makeConfig() const {
- using namespace document::config_builder;
- TypeCache type_cache;
-
- typedef std::set<vespalib::string> UsedFields;
- UsedFields usedFields;
-
- Struct header_struct("searchdocument.header");
- header_struct.setId(-1505212454);
-
- for (size_t i = 0; i < _iFields._textFields.size(); ++i) {
- const Schema::IndexField &field =
- _schema.getIndexField(_iFields._textFields[i]);
-
- // only handles string fields for now
- assert(field.getDataType() == schema::DataType::STRING);
- header_struct.addField(field.getName(), type_cache.getType(
- DataType::T_STRING, field.getCollectionType()));
- usedFields.insert(field.getName());
- }
-
- const int32_t uri_type = document::UrlDataType::getInstance().getId();
- for (size_t i = 0; i < _iFields._uriFields.size(); ++i) {
- const Schema::IndexField &field =
- _schema.getIndexField(_iFields._uriFields[i]._all);
-
- // only handles string fields for now
- assert(field.getDataType() == schema::DataType::STRING);
- header_struct.addField(field.getName(), type_cache.getType(
- uri_type, field.getCollectionType()));
- usedFields.insert(field.getName());
- }
-
- for (uint32_t i = 0; i < _schema.getNumAttributeFields(); ++i) {
- const Schema::AttributeField &field = _schema.getAttributeField(i);
- UsedFields::const_iterator usf = usedFields.find(field.getName());
- if (usf != usedFields.end()) {
- continue; // taken as index field
- }
- auto type_id = convert(field.getDataType());
- if (type_id == DataType::T_TENSOR) {
- header_struct.addTensorField(field.getName(), field.get_tensor_spec());
- } else {
- header_struct.addField(field.getName(), type_cache.getType(
- type_id, field.getCollectionType()));
- }
- usedFields.insert(field.getName());
- }
-
- DocumenttypesConfigBuilderHelper builder;
- builder.document(-645763131, "searchdocument",
- header_struct, Struct("searchdocument.body"));
- return builder.config();
-}
-
-document::config::DocumenttypesConfig
-DocTypeBuilder::makeConfig(const DocumentType &docType)
-{
- typedef document::config::DocumenttypesConfigBuilder DTC;
- DTC cfg;
- { // document type
- DTC::Documenttype dtype;
- dtype.id = docType.getId();
- dtype.name = docType.getName();
- // TODO(vekterli): remove header/body config
- dtype.headerstruct = docType.getFieldsType().getId();
- dtype.bodystruct = docType.getFieldsType().getId();
- cfg.documenttype.push_back(dtype);
- }
- insertStructType(cfg.documenttype[0], docType.getFieldsType());
- return cfg;
-}
-
-}
diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.h b/searchlib/src/vespa/searchlib/index/doctypebuilder.h
deleted file mode 100644
index 4db0ba5b0e3..00000000000
--- a/searchlib/src/vespa/searchlib/index/doctypebuilder.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "schema_index_fields.h"
-#include <vespa/document/config/config-documenttypes.h>
-#include <vespa/document/fieldvalue/fieldvalues.h>
-#include <vespa/vespalib/util/exception.h>
-#include <vespa/vespalib/util/stringfmt.h>
-
-namespace search::index {
-
-/**
- * Builder for the indexingdocument document type based on an index schema.
- **/
-class DocTypeBuilder {
- const Schema &_schema;
- SchemaIndexFields _iFields;
-
-public:
- DocTypeBuilder(const Schema & schema);
- document::config::DocumenttypesConfig makeConfig() const;
-
- static document::config::DocumenttypesConfig
- makeConfig(const document::DocumentType &docType);
-};
-
-}
diff --git a/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp b/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp
index 6515d896917..fabe630432f 100644
--- a/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp
+++ b/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp
@@ -33,6 +33,11 @@ get_document_types_config(EmptyDocBuilder::AddFieldsType add_fields)
}
+EmptyDocBuilder::EmptyDocBuilder()
+ : EmptyDocBuilder([](auto&) noexcept {})
+{
+}
+
EmptyDocBuilder::EmptyDocBuilder(AddFieldsType add_fields)
: _document_types_config(std::make_shared<const DocumenttypesConfig>(get_document_types_config(add_fields))),
_repo(DocumentTypeRepoFactory::make(*_document_types_config)),
diff --git a/searchlib/src/vespa/searchlib/index/empty_doc_builder.h b/searchlib/src/vespa/searchlib/index/empty_doc_builder.h
index 7e734af4e95..18b6543bea1 100644
--- a/searchlib/src/vespa/searchlib/index/empty_doc_builder.h
+++ b/searchlib/src/vespa/searchlib/index/empty_doc_builder.h
@@ -28,6 +28,7 @@ class EmptyDocBuilder {
const document::DocumentType* _document_type;
public:
using AddFieldsType = std::function<void(document::config_builder::Struct&)>;
+ EmptyDocBuilder();
explicit EmptyDocBuilder(AddFieldsType add_fields);
~EmptyDocBuilder();
const document::DocumentTypeRepo& get_repo() const noexcept { return *_repo; }
diff --git a/searchlib/src/vespa/searchlib/index/string_field_builder.cpp b/searchlib/src/vespa/searchlib/index/string_field_builder.cpp
new file mode 100644
index 00000000000..3212a021535
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/string_field_builder.cpp
@@ -0,0 +1,140 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "string_field_builder.h"
+#include "empty_doc_builder.h"
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/fastlib/text/unicodeutil.h>
+#include <vespa/vespalib/text/utf8.h>
+
+#include <cassert>
+
+using document::Annotation;
+using document::AnnotationType;
+using document::FixedTypeRepo;
+using document::StringFieldValue;
+using document::Span;
+using document::SpanList;
+using document::SpanNode;
+using document::SpanTree;
+using vespalib::Utf8Reader;
+using vespalib::Utf8Writer;
+
+namespace search::index {
+
+namespace {
+
+const vespalib::string SPANTREE_NAME("linguistics");
+
+}
+
+StringFieldBuilder::StringFieldBuilder(const EmptyDocBuilder& empty_doc_builder)
+ : _value(),
+ _span_start(0u),
+ _span_list(nullptr),
+ _span_tree(),
+ _last_span(nullptr),
+ _url_mode(false),
+ _repo(empty_doc_builder.get_repo(), empty_doc_builder.get_document_type())
+{
+}
+
+StringFieldBuilder::~StringFieldBuilder() = default;
+
+void
+StringFieldBuilder::start_annotate()
+{
+ auto span_list_up = std::make_unique<SpanList>();
+ _span_list = span_list_up.get();
+ _span_tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
+}
+
+void
+StringFieldBuilder::add_span()
+{
+ assert(_value.size() > _span_start);
+ const SpanNode &span = _span_list->add(std::make_unique<Span>(_span_start, _value.size() - _span_start));
+ _last_span = &span;
+ _span_start = _value.size();
+}
+
+StringFieldBuilder&
+StringFieldBuilder::token(const vespalib::string& val, bool is_word)
+{
+ if (val.empty()) {
+ return *this;
+ }
+ if (!_span_tree) {
+ start_annotate();
+ }
+ _span_start = _value.size();
+ _value.append(val);
+ add_span();
+ if (is_word) {
+ _span_tree->annotate(*_last_span, *AnnotationType::TERM);
+ }
+ return *this;
+}
+
+StringFieldBuilder&
+StringFieldBuilder::alt_word(const vespalib::string& val)
+{
+ assert(_last_span != nullptr);
+ _span_tree->annotate(*_last_span,
+ Annotation(*AnnotationType::TERM,
+ std::make_unique<StringFieldValue>(val)));
+ return *this;
+}
+
+StringFieldBuilder&
+StringFieldBuilder::tokenize(const vespalib::string& val)
+{
+ Utf8Reader reader(val);
+ vespalib::string token_buffer;
+ Utf8Writer writer(token_buffer);
+ uint32_t c = 0u;
+ bool old_word = false;
+
+ while (reader.hasMore()) {
+ c = reader.getChar();
+ bool new_word = Fast_UnicodeUtil::IsWordChar(c) ||
+ (_url_mode && (c == '-' || c == '_'));
+ if (old_word != new_word) {
+ if (!token_buffer.empty()) {
+ token(token_buffer, old_word);
+ token_buffer.clear();
+ }
+ old_word = new_word;
+ }
+ writer.putChar(c);
+ }
+ if (!token_buffer.empty()) {
+ token(token_buffer, old_word);
+ }
+ return *this;
+}
+
+
+document::StringFieldValue
+StringFieldBuilder::build()
+{
+ StringFieldValue value(_value);
+ // Also drop all spans no annotation for now
+ if (_span_tree && _span_tree->numAnnotations() > 0u) {
+ StringFieldValue::SpanTrees trees;
+ trees.emplace_back(std::move(_span_tree));
+ value.setSpanTrees(trees, _repo);
+ } else {
+ _span_tree.reset();
+ }
+ _span_list = nullptr;
+ _last_span = nullptr;
+ _span_start = 0u;
+ _value.clear();
+ return value;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/index/string_field_builder.h b/searchlib/src/vespa/searchlib/index/string_field_builder.h
new file mode 100644
index 00000000000..1987cbbcf74
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/string_field_builder.h
@@ -0,0 +1,45 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+#include <memory>
+
+namespace document {
+class SpanList;
+struct SpanNode;
+class SpanTree;
+class StringFieldValue;
+}
+
+namespace search::index {
+
+class EmptyDocBuilder;
+
+/*
+ * Helper class to build annotated string field.
+ */
+class StringFieldBuilder {
+ vespalib::string _value;
+ size_t _span_start;
+ document::SpanList* _span_list; // owned by _span_tree
+ std::unique_ptr<document::SpanTree> _span_tree;
+ const document::SpanNode* _last_span;
+ bool _url_mode;
+ const document::FixedTypeRepo _repo;
+ void start_annotate();
+ void add_span();
+public:
+ StringFieldBuilder(const EmptyDocBuilder& empty_doc_builder);
+ ~StringFieldBuilder();
+ StringFieldBuilder& url_mode(bool url_mode_) noexcept { _url_mode = url_mode_; return *this; }
+ StringFieldBuilder& token(const vespalib::string& val, bool is_word);
+ StringFieldBuilder& word(const vespalib::string& val) { return token(val, true); }
+ StringFieldBuilder& space() { return token(" ", false); }
+ StringFieldBuilder& tokenize(const vespalib::string& val);
+ StringFieldBuilder& alt_word(const vespalib::string& val);
+ document::StringFieldValue build();
+};
+
+}