summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests')
-rw-r--r--searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp27
-rw-r--r--searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp4
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp12
-rw-r--r--searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp16
-rw-r--r--searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp18
-rw-r--r--searchlib/src/tests/attribute/posting_store/posting_store_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/postinglist/postinglist.cpp26
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp6
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp84
-rw-r--r--searchlib/src/tests/index/docbuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/docbuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/docbuilder/docbuilder_test.cpp437
-rw-r--r--searchlib/src/tests/index/doctypebuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/doctypebuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp74
-rw-r--r--searchlib/src/tests/index/string_field_builder/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp141
-rw-r--r--searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp92
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp357
-rw-r--r--searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp135
-rw-r--r--searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp38
-rw-r--r--searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp243
-rw-r--r--searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp2
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp6
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp6
26 files changed, 672 insertions, 1103 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
index 7f0a88c9f86..3fa74b78d2a 100644
--- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
+++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
@@ -38,8 +38,7 @@ public:
generation_t getGen() const { return getCurrentGeneration(); }
uint32_t getRefCount(generation_t gen) const { return getGenerationRefCount(gen); }
void incGen() { incGeneration(); }
- void updateFirstUsedGen() { updateFirstUsedGeneration(); }
- generation_t getFirstUsedGen() const { return getFirstUsedGeneration(); }
+ generation_t oldest_used_gen() const { return get_oldest_used_generation(); }
};
@@ -49,35 +48,35 @@ TEST("Test attribute guards")
TestAttribute * v = static_cast<TestAttribute *> (vec.get());
EXPECT_EQUAL(v->getGen(), unsigned(0));
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
{
AttributeGuard g0(vec);
EXPECT_EQUAL(v->getGen(), unsigned(0));
EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
{
AttributeGuard g1(vec);
EXPECT_EQUAL(v->getGen(), unsigned(0));
EXPECT_EQUAL(v->getRefCount(0), unsigned(2));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
}
EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
}
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
v->incGen();
EXPECT_EQUAL(v->getGen(), unsigned(1));
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
{
AttributeGuard g0(vec);
EXPECT_EQUAL(v->getGen(), unsigned(1));
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
{
v->incGen();
AttributeGuard g1(vec);
@@ -85,19 +84,19 @@ TEST("Test attribute guards")
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
EXPECT_EQUAL(v->getRefCount(2), unsigned(1));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
}
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
EXPECT_EQUAL(v->getRefCount(2), unsigned(0));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
}
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
EXPECT_EQUAL(v->getRefCount(2), unsigned(0));
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
- v->updateFirstUsedGeneration();
- EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2));
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
+ v->update_oldest_used_generation();
+ EXPECT_EQUAL(v->oldest_used_gen(), unsigned(2));
EXPECT_EQUAL(v->getGen(), unsigned(2));
}
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
index e27065f1c25..b89a3827cc2 100644
--- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
+++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
@@ -135,7 +135,7 @@ DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest()
_postings.clear(tree);
}
_postings.clearBuilder();
- _postings.clearHoldLists();
+ _postings.reclaim_all_memory();
inc_generation();
}
@@ -143,10 +143,10 @@ void
DocumentWeightOrFilterSearchTest::inc_generation()
{
_postings.freeze();
- _postings.transferHoldLists(_gens.getCurrentGeneration());
+ _postings.assign_generation(_gens.getCurrentGeneration());
_gens.incGeneration();
- _gens.updateFirstUsedGeneration();
- _postings.trimHoldLists(_gens.getFirstUsedGeneration());
+ _gens.update_oldest_used_generation();
+ _postings.reclaim_memory(_gens.get_oldest_used_generation());
}
TEST_F(DocumentWeightOrFilterSearchTest, daat_or)
diff --git a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp
index 1d76473754f..9d717202551 100644
--- a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp
+++ b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp
@@ -147,9 +147,9 @@ TEST("requireThatComparatorWithTreeIsWorking")
EXPECT_EQUAL(101, exp);
t.clear(m);
m.freeze();
- m.transferHoldLists(g.getCurrentGeneration());
+ m.assign_generation(g.getCurrentGeneration());
g.incGeneration();
- m.trimHoldLists(g.getFirstUsedGeneration());
+ m.reclaim_memory(g.get_oldest_used_generation());
}
TEST("requireThatFoldedLessIsWorking")
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 02ff01043b0..0542a253cc5 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -345,8 +345,8 @@ TEST(EnumStoreTest, test_hold_lists_and_generation)
// check readers again
checkReaders(ses, readers);
- ses.transfer_hold_lists(sesGen);
- ses.trim_hold_lists(sesGen + 1);
+ ses.assign_generation(sesGen);
+ ses.reclaim_memory(sesGen + 1);
}
void
@@ -357,8 +357,8 @@ dec_ref_count(NumericEnumStore& store, NumericEnumStore::Index idx)
updater.commit();
generation_t gen = 5;
- store.transfer_hold_lists(gen);
- store.trim_hold_lists(gen + 1);
+ store.assign_generation(gen);
+ store.reclaim_memory(gen + 1);
}
TEST(EnumStoreTest, address_space_usage_is_reported)
@@ -882,9 +882,9 @@ namespace {
void inc_generation(generation_t &gen, NumericEnumStore &store)
{
store.freeze_dictionary();
- store.transfer_hold_lists(gen);
+ store.assign_generation(gen);
++gen;
- store.trim_hold_lists(gen);
+ store.reclaim_memory(gen);
}
}
diff --git a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp
index b9f3c23213e..0d2ce048111 100644
--- a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp
+++ b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp
@@ -73,14 +73,14 @@ TEST_F("makeReadGuard(false) acquires guards on both target and reference attrib
EXPECT_EQUAL(2u, f.target_attr->getCurrentGeneration());
EXPECT_EQUAL(2u, f.reference_attr->getCurrentGeneration());
// Should still be holding guard for first generation of writes for both attributes
- EXPECT_EQUAL(1u, f.target_attr->getFirstUsedGeneration());
- EXPECT_EQUAL(1u, f.reference_attr->getFirstUsedGeneration());
+ EXPECT_EQUAL(1u, f.target_attr->get_oldest_used_generation());
+ EXPECT_EQUAL(1u, f.reference_attr->get_oldest_used_generation());
}
// Force a generation handler update
add_n_docs_with_undefined_values(*f.reference_attr, 1);
add_n_docs_with_undefined_values(*f.target_attr, 1);
- EXPECT_EQUAL(3u, f.target_attr->getFirstUsedGeneration());
- EXPECT_EQUAL(3u, f.reference_attr->getFirstUsedGeneration());
+ EXPECT_EQUAL(3u, f.target_attr->get_oldest_used_generation());
+ EXPECT_EQUAL(3u, f.reference_attr->get_oldest_used_generation());
}
TEST_F("makeReadGuard(true) acquires enum guard on target and regular guard on reference attribute", Fixture) {
@@ -95,15 +95,15 @@ TEST_F("makeReadGuard(true) acquires enum guard on target and regular guard on r
EXPECT_EQUAL(5u, f.target_attr->getCurrentGeneration());
EXPECT_EQUAL(2u, f.reference_attr->getCurrentGeneration());
- EXPECT_EQUAL(3u, f.target_attr->getFirstUsedGeneration());
- EXPECT_EQUAL(1u, f.reference_attr->getFirstUsedGeneration());
+ EXPECT_EQUAL(3u, f.target_attr->get_oldest_used_generation());
+ EXPECT_EQUAL(1u, f.reference_attr->get_oldest_used_generation());
EXPECT_TRUE(has_active_enum_guards(*f.target_attr));
}
// Force a generation handler update
add_n_docs_with_undefined_values(*f.reference_attr, 1);
add_n_docs_with_undefined_values(*f.target_attr, 1);
- EXPECT_EQUAL(7u, f.target_attr->getFirstUsedGeneration());
- EXPECT_EQUAL(3u, f.reference_attr->getFirstUsedGeneration());
+ EXPECT_EQUAL(7u, f.target_attr->get_oldest_used_generation());
+ EXPECT_EQUAL(3u, f.reference_attr->get_oldest_used_generation());
EXPECT_FALSE(has_active_enum_guards(*f.target_attr));
}
diff --git a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp
index 735ebcff6cf..8b8f4d2c4d4 100644
--- a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp
+++ b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp
@@ -41,11 +41,11 @@ class MyAttribute : public search::NotImplementedAttribute
_mvMapping.shrink(committedDocIdLimit);
setNumDocs(committedDocIdLimit);
}
- virtual void removeOldGenerations(generation_t firstUsed) override {
- _mvMapping.trimHoldLists(firstUsed);
+ virtual void reclaim_memory(generation_t oldest_used_gen) override {
+ _mvMapping.reclaim_memory(oldest_used_gen);
}
- virtual void onGenerationChange(generation_t generation) override {
- _mvMapping.transferHoldLists(generation - 1);
+ virtual void before_inc_generation(generation_t current_gen) override {
+ _mvMapping.assign_generation(current_gen);
}
public:
@@ -115,8 +115,8 @@ public:
ConstArrayRef act = get(docId);
EXPECT_EQ(exp, std::vector<EntryT>(act.cbegin(), act.cend()));
}
- void transferHoldLists(generation_t generation) { _mvMapping->transferHoldLists(generation); }
- void trimHoldLists(generation_t firstUsed) { _mvMapping->trimHoldLists(firstUsed); }
+ void assign_generation(generation_t current_gen) { _mvMapping->assign_generation(current_gen); }
+ void reclaim_memory(generation_t oldest_used_gen) { _mvMapping->reclaim_memory(oldest_used_gen); }
void addDocs(uint32_t numDocs) {
for (uint32_t i = 0; i < numDocs; ++i) {
uint32_t doc = 0;
@@ -245,12 +245,12 @@ TEST_F(IntMappingTest, test_that_old_value_is_not_overwritten_while_held)
auto old3 = get(3);
assertArray({5}, old3);
set(3, {7});
- transferHoldLists(10);
+ assign_generation(10);
assertArray({5}, old3);
assertGet(3, {7});
- trimHoldLists(10);
+ reclaim_memory(10);
assertArray({5}, old3);
- trimHoldLists(11);
+ reclaim_memory(11);
assertArray({0}, old3);
}
diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
index 36babec6a89..75e7faf0227 100644
--- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
@@ -64,11 +64,11 @@ protected:
{
_value_store.freeze_dictionary();
_store.freeze();
- _value_store.transfer_hold_lists(_gen_handler.getCurrentGeneration());
- _store.transferHoldLists(_gen_handler.getCurrentGeneration());
+ _value_store.assign_generation(_gen_handler.getCurrentGeneration());
+ _store.assign_generation(_gen_handler.getCurrentGeneration());
_gen_handler.incGeneration();
- _value_store.trim_hold_lists(_gen_handler.getFirstUsedGeneration());
- _store.trimHoldLists(_gen_handler.getFirstUsedGeneration());
+ _value_store.reclaim_memory(_gen_handler.get_oldest_used_generation());
+ _store.reclaim_memory(_gen_handler.get_oldest_used_generation());
}
EntryRef add_sequence(int start_key, int end_key)
diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
index 54efb3261c8..1eed3a015e1 100644
--- a/searchlib/src/tests/attribute/postinglist/postinglist.cpp
+++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
@@ -201,7 +201,7 @@ private:
PostingListNodeAllocator &postingsAlloc);
void
- removeOldGenerations(Tree &tree,
+ reclaim_memory(Tree &tree,
ValueHandle &valueHandle,
PostingList &postings,
PostingListNodeAllocator &postingsAlloc);
@@ -259,12 +259,12 @@ AttributePostingListTest::freeTree(bool verbose)
static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()));
_intNodeAlloc->freeze();
_intPostings->freeze();
- _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration());
+ _intNodeAlloc->assign_generation(_handler.getCurrentGeneration());
_intPostings->clearBuilder();
- _intPostings->transferHoldLists(_handler.getCurrentGeneration());
+ _intPostings->assign_generation(_handler.getCurrentGeneration());
_handler.incGeneration();
- _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration());
- _intPostings->trimHoldLists(_handler.getFirstUsedGeneration());
+ _intNodeAlloc->reclaim_memory(_handler.get_oldest_used_generation());
+ _intPostings->reclaim_memory(_handler.get_oldest_used_generation());
LOG(info,
"freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)",
static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
@@ -613,9 +613,9 @@ AttributePostingListTest::doCompactEnumStore(Tree &tree,
valueHandle.holdBuffer(*it);
}
generation_t generation = _handler.getCurrentGeneration();
- valueHandle.transferHoldLists(generation);
+ valueHandle.assign_generation(generation);
_handler.incGeneration();
- valueHandle.trimHoldLists(_handler.getFirstUsedGeneration());
+ valueHandle.reclaim_memory(_handler.get_oldest_used_generation());
LOG(info,
"doCompactEnumStore done");
@@ -658,22 +658,22 @@ bumpGeneration(Tree &tree,
(void) tree;
(void) valueHandle;
postingsAlloc.freeze();
- postingsAlloc.transferHoldLists(_handler.getCurrentGeneration());
- postings.transferHoldLists(_handler.getCurrentGeneration());
+ postingsAlloc.assign_generation(_handler.getCurrentGeneration());
+ postings.assign_generation(_handler.getCurrentGeneration());
_handler.incGeneration();
}
void
AttributePostingListTest::
-removeOldGenerations(Tree &tree,
+reclaim_memory(Tree &tree,
ValueHandle &valueHandle,
PostingList &postings,
PostingListNodeAllocator &postingsAlloc)
{
(void) tree;
(void) valueHandle;
- postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration());
- postings.trimHoldLists(_handler.getFirstUsedGeneration());
+ postingsAlloc.reclaim_memory(_handler.get_oldest_used_generation());
+ postings.reclaim_memory(_handler.get_oldest_used_generation());
}
int
@@ -689,7 +689,7 @@ AttributePostingListTest::Main()
lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
_stlTree, _randomValues);
_intNodeAlloc->freeze();
- _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration());
+ _intNodeAlloc->assign_generation(_handler.getCurrentGeneration());
doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore);
removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
_stlTree, _randomValues);
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 222a3341ef9..9127c4b59fc 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -221,11 +221,11 @@ public:
auto vector = _vectors.get_vector(docid).typify<double>();
_removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
}
- void transfer_hold_lists(generation_t current_gen) override {
+ void assign_generation(generation_t current_gen) override {
_transfer_gen = current_gen;
}
- void trim_hold_lists(generation_t first_used_gen) override {
- _trim_gen = first_used_gen;
+ void reclaim_memory(generation_t oldest_used_gen) override {
+ _trim_gen = oldest_used_gen;
}
bool consider_compact(const CompactionStrategy&) override {
return false;
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
index 6e60d14b8ff..8feb7b7e287 100644
--- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -1,14 +1,20 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/diskindex/fusion.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/common/flush_token.h>
#include <vespa/searchlib/diskindex/diskindex.h>
-#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/diskindex/zcposoccrandread.h>
#include <vespa/searchlib/fef/fieldpositionsiterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/index/schemautil.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
@@ -31,7 +37,10 @@ LOG_SETUP("fusion_test");
namespace search {
+using document::ArrayFieldValue;
using document::Document;
+using document::StringFieldValue;
+using document::WeightedSetFieldValue;
using fef::FieldPositionsIterator;
using fef::TermFieldMatchData;
using fef::TermFieldMatchDataArray;
@@ -110,26 +119,20 @@ toString(FieldPositionsIterator posItr, bool hasElements = false, bool hasWeight
}
std::unique_ptr<Document>
-make_doc10(DocBuilder &b)
+make_doc10(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- addStr("e").addStr("f").addStr("z").
- endField();
- b.startIndexField("f1").
- addStr("w").addStr("x").
- addStr("y").addStr("z").
- endField();
- b.startIndexField("f2").
- startElement(4).addStr("ax").addStr("ay").addStr("z").endElement().
- startElement(5).addStr("ax").endElement().
- endField();
- b.startIndexField("f3").
- startElement(4).addStr("wx").addStr("z").endElement().
- endField();
-
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StringFieldBuilder sfb(b);
+ doc->setValue("f0", sfb.tokenize("a b c d e f z").build());
+ doc->setValue("f1", sfb.tokenize("w x y z").build());
+ ArrayFieldValue string_array(b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("ax ay z").build());
+ string_array.add(sfb.tokenize("ax").build());
+ doc->setValue("f2", string_array);
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("wx z").build(), 4);
+ doc->setValue("f3", string_wset);
+ return doc;
}
Schema::IndexField
@@ -151,6 +154,18 @@ make_schema(bool interleaved_features)
return schema;
}
+EmptyDocBuilder::AddFieldsType
+make_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+}
+
void
assert_interleaved_features(DiskIndex &d, const vespalib::string &field, const vespalib::string &term, uint32_t doc_id, uint32_t exp_num_occs, uint32_t exp_field_length)
{
@@ -327,7 +342,8 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
addField("f2").addField("f3").
addField("f4"));
FieldIndexCollection fic(schema, MockFieldLengthInspector());
- DocBuilder b(schema);
+ EmptyDocBuilder b(make_add_fields());
+ StringFieldBuilder sfb(b);
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
DocumentInverterContext inv_context(schema, *invertThreads, *pushThreads, fic);
@@ -338,19 +354,21 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
inv.invertDocument(10, *doc, {});
myPushDocument(inv);
- b.startDocument("id:ns:searchdocument::11").
- startIndexField("f3").
- startElement(-27).addStr("zz").endElement().
- endField();
- doc = b.endDocument();
+ doc = b.make_document("id:ns:searchdocument::11");
+ {
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.word("zz").build(), -27);
+ doc->setValue("f3", string_wset);
+ }
inv.invertDocument(11, *doc, {});
myPushDocument(inv);
- b.startDocument("id:ns:searchdocument::12").
- startIndexField("f3").
- startElement(0).addStr("zz0").endElement().
- endField();
- doc = b.endDocument();
+ doc = b.make_document("id:ns:searchdocument::12");
+ {
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.word("zz0").build(), 0);
+ doc->setValue("f3", string_wset);
+ }
inv.invertDocument(12, *doc, {});
myPushDocument(inv);
@@ -468,7 +486,7 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng
FieldIndexCollection fic(_schema, field_length_inspector);
uint32_t numDocs = 20;
uint32_t numWords = 1000;
- DocBuilder b(_schema);
+ EmptyDocBuilder b(make_add_fields());
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
DocumentInverterContext inv_context(_schema, *invertThreads, *pushThreads, fic);
diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore
deleted file mode 100644
index 999644fce87..00000000000
--- a/searchlib/src/tests/index/docbuilder/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*_test
-.depend
-Makefile
-docbuilder_test
-searchlib_docbuilder_test_app
diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt
deleted file mode 100644
index 7a969f602ea..00000000000
--- a/searchlib/src/tests/index/docbuilder/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_docbuilder_test_app TEST
- SOURCES
- docbuilder_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app)
diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
deleted file mode 100644
index f76b61dcb78..00000000000
--- a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/log/log.h>
-LOG_SETUP("docbuilder_test");
-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <vespa/searchlib/index/docbuilder.h>
-#include <vespa/vespalib/encoding/base64.h>
-#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <iostream>
-
-using namespace document;
-using search::index::schema::CollectionType;
-
-namespace search::index {
-
-namespace
-{
-std::string empty;
-}
-
-namespace linguistics
-{
-const vespalib::string SPANTREE_NAME("linguistics");
-}
-
-
-TEST("test docBuilder")
-{
- Schema s;
- s.addIndexField(Schema::IndexField("ia", schema::DataType::STRING));
- s.addIndexField(Schema::IndexField("ib", schema::DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("ic", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addUriIndexFields(Schema::IndexField("iu", schema::DataType::STRING));
- s.addUriIndexFields(Schema::IndexField("iau", schema::DataType::STRING, CollectionType::ARRAY));
- s.addUriIndexFields(Schema::IndexField("iwu", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("aa", schema::DataType::INT32));
- s.addAttributeField(Schema::AttributeField("ab", schema::DataType::FLOAT));
- s.addAttributeField(Schema::AttributeField("ac", schema::DataType::STRING));
- s.addAttributeField(Schema::AttributeField("ad", schema::DataType::INT32, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("ae", schema::DataType::FLOAT, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("af", schema::DataType::STRING, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("ag", schema::DataType::INT32, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("ah", schema::DataType::FLOAT, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("ai", schema::DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("asp1", schema::DataType::INT32));
- s.addAttributeField(Schema::AttributeField("asp2", schema::DataType::INT64));
- s.addAttributeField(Schema::AttributeField("aap1", schema::DataType::INT32, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("aap2", schema::DataType::INT64, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("awp1", schema::DataType::INT32, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("awp2", schema::DataType::INT64, CollectionType::WEIGHTEDSET));
-
- DocBuilder b(s);
- Document::UP doc;
- std::vector<std::string> lines;
- std::vector<std::string>::const_iterator itr;
- std::string xml;
-
- { // empty
- doc = b.startDocument("id:ns:searchdocument::0").endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::0\"/>", *itr++);
- EXPECT_EQUAL("", *itr++);
- EXPECT_TRUE(itr == lines.end());
- }
- { // all fields set
- std::vector<char> binaryBlob;
- binaryBlob.push_back('\0');
- binaryBlob.push_back('\2');
- binaryBlob.push_back('\1');
- std::string raw1s("Single Raw Element");
- std::string raw1a0("Array Raw Element 0");
- std::string raw1a1("Array Raw Element 1");
- std::string raw1w0("Weighted Set Raw Element 0");
- std::string raw1w1("Weighted Set Raw Element 1");
- raw1s += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1a0 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1a1 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1w0 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- raw1w1 += std::string(&binaryBlob[0],
- &binaryBlob[0] + binaryBlob.size());
- b.startDocument("id:ns:searchdocument::1");
- b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField();
- b.startIndexField("ib").startElement().addStr("foo").endElement().
- startElement(1).addStr("bar").addStr("baz").endElement().endField();
- b. startIndexField("ic").
- startElement(20).addStr("bar").addStr("baz").endElement().
- startElement().addStr("foo").endElement().
- endField();
- b.startIndexField("iu").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- b.startIndexField("iau").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- b.startIndexField("iwu").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- b.startAttributeField("aa").addInt(2147483647).endField();
- b.startAttributeField("ab").addFloat(1234.56).endField();
- b.startAttributeField("ac").addStr("foo baz").endField();
- b.startAttributeField("ad").startElement().addInt(10).endElement().endField();
- b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField();
- b.startAttributeField("af").startElement().addStr("foo").endElement().endField();
- b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField();
- b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField();
- b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField();
- b.startAttributeField("asp1").addInt(1001).endField();
- b.startAttributeField("asp2").addPosition(1002, 1003).endField();
- b.startAttributeField("aap1").
- startElement().addInt(1004).endElement().
- startElement().addInt(1005).endElement().
- endField();
- b.startAttributeField("aap2").
- startElement().addPosition(1006, 1007).endElement().
- startElement().addPosition(1008, 1009).endElement().
- endField();
- b.startAttributeField("awp1").
- startElement(41).addInt(1010).endElement().
- startElement(42).addInt(1011).endElement().
- endField();
- b.startAttributeField("awp2").
- startElement(43).addPosition(1012, 1013).endElement().
- startElement(44).addPosition(1014, 1015).endElement().
- endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::1\">", *itr++);
- EXPECT_EQUAL("<iu>", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:81/fluke?ab=2#4</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>81</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>4</fragment>", *itr++);
- EXPECT_EQUAL("</iu>", *itr++);
- EXPECT_EQUAL("<aa>2147483647</aa>", *itr++);
- EXPECT_EQUAL("<aap2>", *itr++);
- EXPECT_EQUAL("<item>1047806</item>", *itr++);
- EXPECT_EQUAL("<item>1048322</item>", *itr++);
- EXPECT_EQUAL("</aap2>", *itr++);
- EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++);
- EXPECT_EQUAL("<ae>", *itr++);
- EXPECT_EQUAL("<item>10.5</item>", *itr++);
- EXPECT_EQUAL("</ae>", *itr++);
- EXPECT_EQUAL("<ib>", *itr++);
- EXPECT_EQUAL("<item>foo</item>", *itr++);
- EXPECT_EQUAL("<item>bar baz</item>", *itr++);
- EXPECT_EQUAL("</ib>", *itr++);
- EXPECT_EQUAL("<ah>", *itr++);
- EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++);
- EXPECT_EQUAL("</ah>", *itr++);
- EXPECT_EQUAL("<ic>", *itr++);
- EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++);
- EXPECT_EQUAL("</ic>", *itr++);
- EXPECT_EQUAL("<ac>foo baz</ac>", *itr++);
- EXPECT_EQUAL("<awp2>", *itr++);
- EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++);
- EXPECT_EQUAL("</awp2>", *itr++);
- EXPECT_EQUAL("<iau>", *itr++);
- EXPECT_EQUAL("<item>", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:82/fluke?ab=2#8</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>82</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>8</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("<item>", *itr++);
- EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++);
- EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>82</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>9</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("</iau>", *itr++);
- EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++);
- EXPECT_EQUAL("<ai>", *itr++);
- EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++);
- EXPECT_EQUAL("</ai>", *itr++);
- EXPECT_EQUAL("<asp1>1001</asp1>", *itr++);
- EXPECT_EQUAL("<ad>", *itr++);
- EXPECT_EQUAL("<item>10</item>", *itr++);
- EXPECT_EQUAL("</ad>", *itr++);
- EXPECT_EQUAL("<iwu>", *itr++);
- EXPECT_EQUAL("<item weight=\"4\">", *itr++);
- EXPECT_EQUAL("<all>http://www.example.com:83/fluke?ab=2#12</all>", *itr++);
- EXPECT_EQUAL("<host>www.example.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>83</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>12</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"7\">", *itr++);
- EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++);
- EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
- EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
- EXPECT_EQUAL("<path>/fluke</path>", *itr++);
- EXPECT_EQUAL("<port>85</port>", *itr++);
- EXPECT_EQUAL("<query>ab=2</query>", *itr++);
- EXPECT_EQUAL("<fragment>13</fragment>", *itr++);
- EXPECT_EQUAL("</item>", *itr++);
- EXPECT_EQUAL("</iwu>", *itr++);
- EXPECT_EQUAL("<ab>1234.56</ab>", *itr++);
- EXPECT_EQUAL("<ag>", *itr++);
- EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++);
- EXPECT_EQUAL("</ag>", *itr++);
- EXPECT_EQUAL("<awp1>", *itr++);
- EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++);
- EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++);
- EXPECT_EQUAL("</awp1>", *itr++);
- EXPECT_EQUAL("<aap1>", *itr++);
- EXPECT_EQUAL("<item>1004</item>", *itr++);
- EXPECT_EQUAL("<item>1005</item>", *itr++);
- EXPECT_EQUAL("</aap1>", *itr++);
- EXPECT_EQUAL("<af>", *itr++);
- EXPECT_EQUAL("<item>foo</item>", *itr++);
- EXPECT_EQUAL("</af>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
-#if 0
- std::cout << "onedoc xml start -----" << std::endl <<
- xml << std::endl <<
- "-------" << std::endl;
- std::cout << "onedoc toString start ----" << std::endl <<
- doc->toString(true) << std::endl <<
- "-------" << std::endl;
-#endif
- }
- { // create one more to see that everything is cleared
- b.startDocument("id:ns:searchdocument::2");
- b.startIndexField("ia").addStr("yes").endField();
- b.startAttributeField("aa").addInt(20).endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::2\">", *itr++);
- EXPECT_EQUAL("<aa>20</aa>", *itr++);
- EXPECT_EQUAL("<ia>yes</ia>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
- }
- { // create field with cjk chars
- b.startDocument("id:ns:searchdocument::3");
- b.startIndexField("ia").
- addStr("我就是那个").
- setAutoSpace(false).
- addStr("大灰狼").
- setAutoSpace(true).
- endField();
- doc = b.endDocument();
- xml = doc->toXml("");
- boost::split(lines, xml, boost::is_any_of("\n"));
- itr = lines.begin();
- EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::3\">", *itr++);
- EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++);
- EXPECT_EQUAL("</document>", *itr++);
- EXPECT_TRUE(itr == lines.end());
- const FieldValue::UP iaval = doc->getValue("ia");
- ASSERT_TRUE(iaval.get() != NULL);
- const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *>
- (iaval.get());
- ASSERT_TRUE(iasval != NULL);
- StringFieldValue::SpanTrees trees = iasval->getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME);
- ASSERT_TRUE(tree != NULL);
- std::vector<Span> spans;
- std::vector<Span> expSpans;
- for (SpanTree::const_iterator i = tree->begin(), ie = tree->end();
- i != ie; ++i) {
- Annotation &ann = const_cast<Annotation &>(*i);
- const Span *span = dynamic_cast<const Span *>(ann.getSpanNode());
- if (span == NULL)
- continue;
- spans.push_back(*span);
- }
- expSpans.push_back(Span(0, 15));
- expSpans.push_back(Span(0, 15));
- expSpans.push_back(Span(15, 9));
- expSpans.push_back(Span(15, 9));
- ASSERT_TRUE(expSpans == spans);
-#if 0
- std::cout << "onedoc xml start -----" << std::endl <<
- xml << std::endl <<
- "-------" << std::endl;
- std::cout << "onedoc toString start ----" << std::endl <<
- doc->toString(true) << std::endl <<
- "-------" << std::endl;
-#endif
- }
-}
-
-TEST("test if index names are valid uri parts") {
- EXPECT_FALSE(UriField::mightBePartofUri("all"));
- EXPECT_FALSE(UriField::mightBePartofUri("fragment"));
- EXPECT_FALSE(UriField::mightBePartofUri(".all"));
- EXPECT_FALSE(UriField::mightBePartofUri("all.b"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.all"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.scheme"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.host"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.port"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.hostname"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.path"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.query"));
- EXPECT_TRUE(UriField::mightBePartofUri("b.fragment"));
-}
-
-}
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore
deleted file mode 100644
index f15be1efcfe..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*_test
-.depend
-Makefile
-doctypebuilder_test
-searchlib_doctypebuilder_test_app
diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
deleted file mode 100644
index 348ecde5a7c..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_doctypebuilder_test_app TEST
- SOURCES
- doctypebuilder_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app)
diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
deleted file mode 100644
index 95854fa11b2..00000000000
--- a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/document/repo/documenttyperepo.h>
-#include <vespa/searchlib/index/doctypebuilder.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/vespalib/testkit/testapp.h>
-
-using namespace document;
-
-namespace search {
-namespace index {
-
-using schema::CollectionType;
-using schema::DataType;
-
-TEST("testSearchDocType") {
- Schema s;
- s.addIndexField(Schema::IndexField("ia", DataType::STRING));
- s.addIndexField(Schema::IndexField("ib", DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("ic", DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addUriIndexFields(Schema::IndexField("iu", DataType::STRING));
- s.addUriIndexFields(Schema::IndexField("iau", DataType::STRING, CollectionType::ARRAY));
- s.addUriIndexFields(Schema::IndexField("iwu", DataType::STRING, CollectionType::WEIGHTEDSET));
- s.addAttributeField(Schema::AttributeField("aa", DataType::INT32));
- s.addAttributeField(Schema::AttributeField("spos", DataType::INT64));
- s.addAttributeField(Schema::AttributeField("apos", DataType::INT64, CollectionType::ARRAY));
- s.addAttributeField(Schema::AttributeField("wpos", DataType::INT64, CollectionType::WEIGHTEDSET));
-
- DocTypeBuilder docTypeBuilder(s);
- document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig();
- DocumentTypeRepo repo(config);
- const DocumentType *docType = repo.getDocumentType("searchdocument");
- ASSERT_TRUE(docType);
- EXPECT_EQUAL(10u, docType->getFieldCount());
-
- EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName());
- EXPECT_EQUAL("Array<String>",
- docType->getField("ib").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<String>",
- docType->getField("ic").getDataType().getName());
- EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName());
- EXPECT_EQUAL("Array<url>",
- docType->getField("iau").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<url>",
- docType->getField("iwu").getDataType().getName());
-
- EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName());
- EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName());
- EXPECT_EQUAL("Array<Long>",
- docType->getField("apos").getDataType().getName());
- EXPECT_EQUAL("WeightedSet<Long>",
- docType->getField("wpos").getDataType().getName());
-}
-
-TEST("require that multiple fields can have the same type") {
- Schema s;
- s.addIndexField(Schema::IndexField("array1", DataType::STRING, CollectionType::ARRAY));
- s.addIndexField(Schema::IndexField("array2", DataType::STRING, CollectionType::ARRAY));
- DocTypeBuilder docTypeBuilder(s);
- document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig();
- DocumentTypeRepo repo(config);
- const DocumentType *docType = repo.getDocumentType("searchdocument");
- ASSERT_TRUE(docType);
- EXPECT_EQUAL(2u, docType->getFieldCount());
-
- EXPECT_EQUAL("Array<String>",
- docType->getField("array1").getDataType().getName());
- EXPECT_EQUAL("Array<String>",
- docType->getField("array2").getDataType().getName());
-}
-
-} // namespace index
-} // namespace search
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/string_field_builder/CMakeLists.txt b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt
new file mode 100644
index 00000000000..f8774eae5ca
--- /dev/null
+++ b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_string_field_builder_test_app TEST
+ SOURCES
+ string_field_builder_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_string_field_builder_test_app COMMAND searchlib_string_field_builder_test_app)
diff --git a/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp
new file mode 100644
index 00000000000..8c2b641f724
--- /dev/null
+++ b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp
@@ -0,0 +1,141 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/index/string_field_builder.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <cassert>
+#include <iostream>
+
+using document::Annotation;
+using document::AnnotationType;
+using document::Span;
+using document::SpanNode;
+using document::SpanTree;
+using document::StringFieldValue;
+using search::index::EmptyDocBuilder;
+using search::index::StringFieldBuilder;
+
+namespace
+{
+
+const vespalib::string SPANTREE_NAME("linguistics");
+
+struct MyAnnotation {
+ int32_t start;
+ int32_t length;
+ std::optional<vespalib::string> label;
+
+ MyAnnotation(int32_t start_in, int32_t length_in) noexcept
+ : start(start_in),
+ length(length_in),
+ label()
+ {
+ }
+
+ MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept
+ : start(start_in),
+ length(length_in),
+ label(label_in)
+ {
+ }
+
+ bool operator==(const MyAnnotation& rhs) const noexcept;
+};
+
+bool
+MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept
+{
+ return start == rhs.start &&
+ length == rhs.length &&
+ label == rhs.label;
+}
+
+
+std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) {
+ os << "[" << ann.start << "," << ann.length << "]";
+ if (ann.label.has_value()) {
+ os << "(\"" << ann.label.value() << "\")";
+ }
+ return os;
+}
+
+}
+
+class StringFieldBuilderTest : public testing::Test
+{
+protected:
+ EmptyDocBuilder edb;
+ StringFieldBuilder sfb;
+ StringFieldBuilderTest();
+ ~StringFieldBuilderTest();
+ std::vector<MyAnnotation> get_annotations(const StringFieldValue& val);
+ void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val);
+};
+
+StringFieldBuilderTest::StringFieldBuilderTest()
+ : testing::Test(),
+ edb(),
+ sfb(edb)
+{
+}
+
+StringFieldBuilderTest::~StringFieldBuilderTest() = default;
+
+std::vector<MyAnnotation>
+StringFieldBuilderTest::get_annotations(const StringFieldValue& val)
+{
+ std::vector<MyAnnotation> result;
+ StringFieldValue::SpanTrees trees = val.getSpanTrees();
+ const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
+ if (tree != nullptr) {
+ for (auto& ann : *tree) {
+ assert(ann.getType() == *AnnotationType::TERM);
+ auto span = dynamic_cast<const Span *>(ann.getSpanNode());
+ if (span == nullptr) {
+ continue;
+ }
+ auto ann_fv = ann.getFieldValue();
+ if (ann_fv == nullptr) {
+ result.emplace_back(span->from(), span->length());
+ } else {
+ result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue());
+ }
+ }
+ }
+ return result;
+}
+
+void
+StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val)
+{
+ EXPECT_EQ(exp, get_annotations(val));
+ EXPECT_EQ(plain, val.getValue());
+}
+
+TEST_F(StringFieldBuilderTest, no_annotations)
+{
+ assert_annotations({}, "foo", StringFieldValue("foo"));
+}
+
+TEST_F(StringFieldBuilderTest, single_word)
+{
+ assert_annotations({{0, 4}}, "word", sfb.word("word").build());
+}
+
+TEST_F(StringFieldBuilderTest, tokenize)
+{
+ assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build());
+}
+
+TEST_F(StringFieldBuilderTest, alt_word)
+{
+ assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
index 3f8a04d9460..83746b611fb 100644
--- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
@@ -1,8 +1,13 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/searchlib/index/docbuilder.h>
-#include <vespa/searchlib/index/field_length_calculator.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
+#include <vespa/searchlib/index/field_length_calculator.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
@@ -19,9 +24,10 @@
namespace search::memoryindex {
using document::Document;
-using index::DocBuilder;
+using index::EmptyDocBuilder;
using index::FieldLengthCalculator;
using index::Schema;
+using index::StringFieldBuilder;
using index::schema::CollectionType;
using index::schema::DataType;
using vespalib::SequencedTaskExecutor;
@@ -29,64 +35,68 @@ using vespalib::ISequencedTaskExecutor;
namespace {
+EmptyDocBuilder::AddFieldsType
+make_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+}
+
Document::UP
-makeDoc10(DocBuilder &b)
+makeDoc10(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.tokenize("a b c d").build());
+ return doc;
}
Document::UP
-makeDoc11(DocBuilder &b)
+makeDoc11(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::11");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("e").addStr("f").
- endField();
- b.startIndexField("f1").
- addStr("a").addStr("g").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::11");
+ doc->setValue("f0", sfb.tokenize("a b e f").build());
+ doc->setValue("f1", sfb.tokenize("a g").build());
+ return doc;
}
Document::UP
-makeDoc12(DocBuilder &b)
+makeDoc12(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::12");
- b.startIndexField("f0").
- addStr("h").addStr("doc12").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::12");
+ doc->setValue("f0", sfb.tokenize("h doc12").build());
+ return doc;
}
Document::UP
-makeDoc13(DocBuilder &b)
+makeDoc13(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::13");
- b.startIndexField("f0").
- addStr("i").addStr("doc13").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::13");
+ doc->setValue("f0", sfb.tokenize("i doc13").build());
+ return doc;
}
Document::UP
-makeDoc14(DocBuilder &b)
+makeDoc14(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::14");
- b.startIndexField("f0").
- addStr("j").addStr("doc14").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::14");
+ doc->setValue("f0", sfb.tokenize("j doc14").build());
+ return doc;
}
Document::UP
-makeDoc15(DocBuilder &b)
+makeDoc15(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::15");
- return b.endDocument();
+ return b.make_document("id:ns:searchdocument::15");
}
}
@@ -96,7 +106,7 @@ VESPA_THREAD_STACK_TAG(push_executor)
struct DocumentInverterTest : public ::testing::Test {
Schema _schema;
- DocBuilder _b;
+ EmptyDocBuilder _b;
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
WordStore _word_store;
@@ -118,7 +128,7 @@ struct DocumentInverterTest : public ::testing::Test {
DocumentInverterTest()
: _schema(makeSchema()),
- _b(_schema),
+ _b(make_add_fields()),
_invertThreads(SequencedTaskExecutor::create(invert_executor, 1)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 1)),
_word_store(),
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index dcca1f136f6..04d1f08db6f 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -1,13 +1,22 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/structfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/diskindex/zcposoccrandread.h>
#include <vespa/searchlib/fef/fieldpositionsiterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
@@ -37,7 +46,11 @@ namespace search {
using namespace fef;
using namespace index;
+using document::ArrayFieldValue;
using document::Document;
+using document::StructFieldValue;
+using document::UrlDataType;
+using document::WeightedSetFieldValue;
using queryeval::RankedSearchIteratorBase;
using queryeval::SearchIterator;
using search::index::schema::CollectionType;
@@ -505,6 +518,12 @@ make_single_field_schema()
return result;
}
+EmptyDocBuilder::AddFieldsType
+make_single_add_fields()
+{
+ return [](auto& header) { header.addField("f0", document::DataType::T_STRING); };
+}
+
template <typename FieldIndexType>
struct FieldIndexTest : public ::testing::Test {
Schema schema;
@@ -706,6 +725,18 @@ make_multi_field_schema()
return result;
}
+EmptyDocBuilder::AddFieldsType
+make_multi_field_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+}
+
struct FieldIndexCollectionTest : public ::testing::Test {
Schema schema;
FieldIndexCollection fic;
@@ -907,16 +938,16 @@ class InverterTest : public ::testing::Test {
public:
Schema _schema;
FieldIndexCollection _fic;
- DocBuilder _b;
+ EmptyDocBuilder _b;
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
DocumentInverterContext _inv_context;
DocumentInverter _inv;
- InverterTest(const Schema& schema)
+ InverterTest(const Schema& schema, EmptyDocBuilder::AddFieldsType add_fields)
: _schema(schema),
_fic(_schema, MockFieldLengthInspector()),
- _b(_schema),
+ _b(add_fields),
_invertThreads(SequencedTaskExecutor::create(invert_executor, 2)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 2)),
_inv_context(_schema, *_invertThreads, *_pushThreads, _fic),
@@ -938,91 +969,63 @@ public:
class BasicInverterTest : public InverterTest {
public:
- BasicInverterTest() : InverterTest(make_multi_field_schema()) {}
+ BasicInverterTest() : InverterTest(make_multi_field_schema(), make_multi_field_add_fields()) {}
};
TEST_F(BasicInverterTest, require_that_inversion_is_working)
{
Document::UP doc;
+ StringFieldBuilder sfb(_b);
- _b.startDocument("id:ns:searchdocument::10");
- _b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.tokenize("a b c d").build());
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::20");
- _b.startIndexField("f0").
- addStr("a").addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::20");
+ doc->setValue("f0", sfb.tokenize("a a b c d").build());
_inv.invertDocument(20, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::30");
- _b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- addStr("e").addStr("f").
- endField();
- _b.startIndexField("f1").
- addStr("\nw2").addStr("w").addStr("x").
- addStr("\nw3").addStr("y").addStr("z").
- endField();
- _b.startIndexField("f2").
- startElement(4).
- addStr("w").addStr("x").
- endElement().
- startElement(5).
- addStr("y").addStr("z").
- endElement().
- endField();
- _b.startIndexField("f3").
- startElement(6).
- addStr("w").addStr("x").
- endElement().
- startElement(7).
- addStr("y").addStr("z").
- endElement().
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::30");
+ doc->setValue("f0", sfb.tokenize("a b c d e f").build());
+ doc->setValue("f1", sfb.word("\nw2").tokenize(" w x ").
+ word("\nw3").tokenize(" y z").build());
+ {
+ ArrayFieldValue string_array(_b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("w x").build());
+ string_array.add(sfb.tokenize("y z").build());
+ doc->setValue("f2", string_array);
+ }
+ {
+ WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("w x").build(), 6);
+ string_wset.add(sfb.tokenize("y z").build(), 7);
+ doc->setValue("f3", string_wset);
+ }
_inv.invertDocument(30, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::40");
- _b.startIndexField("f0").
- addStr("a").addStr("a").addStr("b").addStr("c").addStr("a").
- addStr("e").addStr("f").
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::40");
+ doc->setValue("f0", sfb.tokenize("a a b c a e f").build());
_inv.invertDocument(40, *doc, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::999");
- _b.startIndexField("f0").
- addStr("this").addStr("is").addStr("_a_").addStr("test").
- addStr("for").addStr("insertion").addStr("speed").addStr("with").
- addStr("more").addStr("than").addStr("just").addStr("__a__").
- addStr("few").addStr("words").addStr("present").addStr("in").
- addStr("some").addStr("of").addStr("the").addStr("fields").
- endField();
- _b.startIndexField("f1").
- addStr("the").addStr("other").addStr("field").addStr("also").
- addStr("has").addStr("some").addStr("content").
- endField();
- _b.startIndexField("f2").
- startElement(1).
- addStr("strange").addStr("things").addStr("here").
- addStr("has").addStr("some").addStr("content").
- endElement().
- endField();
- _b.startIndexField("f3").
- startElement(3).
- addStr("not").addStr("a").addStr("weighty").addStr("argument").
- endElement().
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::999");
+ doc->setValue("f0", sfb.tokenize("this is ").word("_a_").
+ tokenize(" test for insertion speed with more than just ").
+ word("__a__").tokenize(" few words present in some of the fields").build());
+ doc->setValue("f1", sfb.tokenize("the other field also has some content").build());
+ {
+ ArrayFieldValue string_array(_b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("strange things here has some content").build());
+ doc->setValue("f2", string_array);
+ }
+ {
+ WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("not a weighty argument").build(), 3);
+ doc->setValue("f3", string_wset);
+ }
for (uint32_t docId = 10000; docId < 20000; ++docId) {
_inv.invertDocument(docId, *doc, {});
myPushDocument(_inv);
@@ -1132,19 +1135,17 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working)
TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remover)
{
- Document::UP doc;
+ StringFieldBuilder sfb(_b);
- _b.startDocument("id:ns:searchdocument::1");
- _b.startIndexField("f0").addStr("a").addStr("b").endField();
- _b.startIndexField("f1").addStr("a").addStr("c").endField();
- Document::UP doc1 = _b.endDocument();
- _inv.invertDocument(1, *doc1.get(), {});
+ auto doc1 = _b.make_document("id:ns:searchdocument::1");
+ doc1->setValue("f0", sfb.tokenize("a b").build());
+ doc1->setValue("f1", sfb.tokenize("a c").build());
+ _inv.invertDocument(1, *doc1, {});
myPushDocument(_inv);
- _b.startDocument("id:ns:searchdocument::2");
- _b.startIndexField("f0").addStr("b").addStr("c").endField();
- Document::UP doc2 = _b.endDocument();
- _inv.invertDocument(2, *doc2.get(), {});
+ auto doc2 = _b.make_document("id:ns:searchdocument::2");
+ doc2->setValue("f0", sfb.tokenize("b c").build());
+ _inv.invertDocument(2, *doc2, {});
myPushDocument(_inv);
EXPECT_TRUE(assertPostingList("[1]", find("a", 0)));
@@ -1172,136 +1173,71 @@ make_uri_schema()
return result;
}
+EmptyDocBuilder::AddFieldsType
+make_uri_add_fields()
+{
+ return [](auto& header) { using namespace document::config_builder;
+ header.addField("iu", UrlDataType::getInstance().getId())
+ .addField("iau", Array(UrlDataType::getInstance().getId()))
+ .addField("iwu", Wset(UrlDataType::getInstance().getId()));
+ };
+}
+
class UriInverterTest : public InverterTest {
public:
- UriInverterTest() : InverterTest(make_uri_schema()) {}
+ UriInverterTest() : InverterTest(make_uri_schema(), make_uri_add_fields()) {}
};
TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
{
Document::UP doc;
-
- _b.startDocument("id:ns:searchdocument::10");
- _b.startIndexField("iu").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- _b.startIndexField("iau").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- _b.startIndexField("iwu").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- doc = _b.endDocument();
+ StringFieldBuilder sfb(_b);
+ sfb.url_mode(true);
+ StructFieldValue url_value(_b.get_data_type("url"));
+
+ doc = _b.make_document("id:ns:searchdocument::10");
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("81").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("4").build());
+ doc->setValue("iu", url_value);
+ ArrayFieldValue url_array(_b.get_data_type("Array<url>"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("82").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("8").build());
+ url_array.add(url_value);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("fragment", sfb.tokenize("9").build());
+ url_array.add(url_value);
+ doc->setValue("iau", url_array);
+ WeightedSetFieldValue url_wset(_b.get_data_type("WeightedSet<url>"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("83").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("12").build());
+ url_wset.add(url_value, 4);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("port", sfb.tokenize("85").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("13").build());
+ url_wset.add(url_value, 7);
+ doc->setValue("iwu", url_wset);
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
@@ -1360,21 +1296,16 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
class CjkInverterTest : public InverterTest {
public:
- CjkInverterTest() : InverterTest(make_single_field_schema()) {}
+ CjkInverterTest() : InverterTest(make_single_field_schema(), make_single_add_fields()) {}
};
TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working)
{
Document::UP doc;
+ StringFieldBuilder sfb(_b);
- _b.startDocument("id:ns:searchdocument::10");
- _b.startIndexField("f0").
- addStr("我就是那个").
- setAutoSpace(false).
- addStr("大灰狼").
- setAutoSpace(true).
- endField();
- doc = _b.endDocument();
+ doc = _b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.word("我就是那个").word("大灰狼").build());
_inv.invertDocument(10, *doc, {});
myPushDocument(_inv);
diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
index ed049a82c42..bf3a911a579 100644
--- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
@@ -1,8 +1,14 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/field_length_calculator.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/word_store.h>
@@ -13,9 +19,12 @@
namespace search {
+using document::ArrayFieldValue;
using document::Document;
-using index::DocBuilder;
+using document::WeightedSetFieldValue;
+using index::EmptyDocBuilder;
using index::Schema;
+using index::StringFieldBuilder;
using index::schema::CollectionType;
using index::schema::DataType;
@@ -26,93 +35,91 @@ namespace memoryindex {
namespace {
Document::UP
-makeDoc10(DocBuilder &b)
+makeDoc10(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("c").addStr("d").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ doc->setValue("f0", sfb.tokenize("a b c d").build());
+ return doc;
}
Document::UP
-makeDoc11(DocBuilder &b)
+makeDoc11(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::11");
- b.startIndexField("f0").
- addStr("a").addStr("b").addStr("e").addStr("f").
- endField();
- b.startIndexField("f1").
- addStr("a").addStr("g").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::11");
+ doc->setValue("f0", sfb.tokenize("a b e f").build());
+ doc->setValue("f1", sfb.tokenize("a g").build());
+ return doc;
}
Document::UP
-makeDoc12(DocBuilder &b)
+makeDoc12(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::12");
- b.startIndexField("f0").
- addStr("h").addStr("doc12").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::12");
+ doc->setValue("f0", sfb.tokenize("h doc12").build());
+ return doc;
}
Document::UP
-makeDoc13(DocBuilder &b)
+makeDoc13(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::13");
- b.startIndexField("f0").
- addStr("i").addStr("doc13").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::13");
+ doc->setValue("f0", sfb.tokenize("i doc13").build());
+ return doc;
}
Document::UP
-makeDoc14(DocBuilder &b)
+makeDoc14(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::14");
- b.startIndexField("f0").
- addStr("j").addStr("doc14").
- endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::14");
+ doc->setValue("f0", sfb.tokenize("j doc14").build());
+ return doc;
}
Document::UP
-makeDoc15(DocBuilder &b)
+makeDoc15(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::15");
- return b.endDocument();
+ return b.make_document("id:ns:searchdocument::15");
}
Document::UP
-makeDoc16(DocBuilder &b)
+makeDoc16(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::16");
- b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz").
- addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty").
- addStr("z").endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::16");
+ doc->setValue("f0", sfb.tokenize("foo bar baz").alt_word("altbaz").tokenize(" y").alt_word("alty").tokenize(" z").build());
+ return doc;
}
Document::UP
-makeDoc17(DocBuilder &b)
+makeDoc17(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::17");
- b.startIndexField("f1").addStr("foo0").addStr("bar0").endField();
- b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField();
- b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField();
- return b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::17");
+ doc->setValue("f1", sfb.tokenize("foo0 bar0").build());
+ ArrayFieldValue string_array(b.get_data_type("Array<String>"));
+ string_array.add(sfb.tokenize("foo bar").build());
+ string_array.add(sfb.tokenize("bar").build());
+ doc->setValue("f2", string_array);
+ WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>"));
+ string_wset.add(sfb.tokenize("foo2 bar2").build(), 3);
+ string_wset.add(sfb.tokenize("bar2").build(), 4);
+ doc->setValue("f3", string_wset);
+ return doc;
}
vespalib::string corruptWord = "corruptWord";
Document::UP
-makeCorruptDocument(DocBuilder &b, size_t wordOffset)
+makeCorruptDocument(EmptyDocBuilder &b, size_t wordOffset)
{
- b.startDocument("id:ns:searchdocument::18");
- b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField();
- auto doc = b.endDocument();
+ StringFieldBuilder sfb(b);
+ auto doc = b.make_document("id:ns:searchdocument::18");
+ doc->setValue("f0", sfb.tokenize("before ").word(corruptWord).tokenize(" after z").build());
vespalib::nbostream stream;
doc->serialize(stream);
std::vector<char> raw;
@@ -127,14 +134,14 @@ makeCorruptDocument(DocBuilder &b, size_t wordOffset)
}
vespalib::nbostream badstream;
badstream.write(&raw[0], raw.size());
- return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream);
+ return std::make_unique<Document>(b.get_repo(), badstream);
}
}
struct FieldInverterTest : public ::testing::Test {
Schema _schema;
- DocBuilder _b;
+ EmptyDocBuilder _b;
WordStore _word_store;
FieldIndexRemover _remover;
test::OrderedFieldIndexInserterBackend _inserter_backend;
@@ -151,9 +158,21 @@ struct FieldInverterTest : public ::testing::Test {
return schema;
}
+ static EmptyDocBuilder::AddFieldsType
+ make_add_fields()
+ {
+ return [](auto& header) { using namespace document::config_builder;
+ using DataType = document::DataType;
+ header.addField("f0", DataType::T_STRING)
+ .addField("f1", DataType::T_STRING)
+ .addField("f2", Array(DataType::T_STRING))
+ .addField("f3", Wset(DataType::T_STRING));
+ };
+ }
+
FieldInverterTest()
: _schema(makeSchema()),
- _b(_schema),
+ _b(make_add_fields()),
_word_store(),
_remover(_word_store),
_inserter_backend(),
diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
index b3ea948dfa7..1730e34adb5 100644
--- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
@@ -1,11 +1,15 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/searchlib/common/scheduletaskcallback.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/i_field_length_inspector.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
@@ -59,6 +63,12 @@ struct MySetup : public IFieldLengthInspector {
}
return FieldLengthInfo();
}
+ void add_fields(document::config_builder::Struct& header) const {
+ for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) {
+ auto& field = schema.getIndexField(i);
+ header.addField(field.getName(), document::DataType::T_STRING);
+ }
+ }
};
@@ -70,31 +80,38 @@ struct Index {
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
MemoryIndex index;
- DocBuilder builder;
+ EmptyDocBuilder builder;
+ StringFieldBuilder sfb;
+ std::unique_ptr<Document> builder_doc;
uint32_t docid;
std::string currentField;
+ bool add_space;
Index(const MySetup &setup);
~Index();
void closeField() {
if (!currentField.empty()) {
- builder.endField();
+ builder_doc->setValue(currentField, sfb.build());
currentField.clear();
}
}
Index &doc(uint32_t id) {
docid = id;
- builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", id));
+ builder_doc = builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", id));
return *this;
}
Index &field(const std::string &name) {
closeField();
- builder.startIndexField(name);
currentField = name;
+ add_space = false;
return *this;
}
Index &add(const std::string &token) {
- builder.addStr(token);
+ if (add_space) {
+ sfb.space();
+ }
+ add_space = true;
+ sfb.word(token);
return *this;
}
void internalSyncCommit() {
@@ -106,7 +123,7 @@ struct Index {
}
Document::UP commit() {
closeField();
- Document::UP d = builder.endDocument();
+ Document::UP d = std::move(builder_doc);
index.insertDocument(docid, *d, {});
internalSyncCommit();
return d;
@@ -133,9 +150,12 @@ Index::Index(const MySetup &setup)
_invertThreads(SequencedTaskExecutor::create(invert_executor, 2)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 2)),
index(schema, setup, *_invertThreads, *_pushThreads),
- builder(schema),
+ builder([&setup](auto& header) { setup.add_fields(header); }),
+ sfb(builder),
+ builder_doc(),
docid(1),
- currentField()
+ currentField(),
+ add_space(false)
{
}
Index::~Index() = default;
diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
index 969f483eef6..3995f06628c 100644
--- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp
@@ -1,11 +1,21 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/memoryindex/url_field_inverter.h>
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/structfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/index/empty_doc_builder.h>
#include <vespa/searchlib/index/field_length_calculator.h>
+#include <vespa/searchlib/index/schema_index_fields.h>
+#include <vespa/searchlib/index/string_field_builder.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
-#include <vespa/searchlib/memoryindex/url_field_inverter.h>
#include <vespa/searchlib/memoryindex/word_store.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter_backend.h>
@@ -14,6 +24,10 @@
namespace search {
using document::Document;
+using document::ArrayFieldValue;
+using document::StructFieldValue;
+using document::UrlDataType;
+using document::WeightedSetFieldValue;
using index::schema::CollectionType;
using index::schema::DataType;
@@ -26,160 +40,88 @@ namespace {
const vespalib::string url = "url";
Document::UP
-makeDoc10Single(DocBuilder &b)
+makeDoc10Single(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("url").
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("81").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- addTermAnnotation("altfluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("4").
- endSubField().
- endField();
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StructFieldValue url_value(b.get_data_type("url"));
+ StringFieldBuilder sfb(b);
+ sfb.url_mode(true);
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("81").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("4").build());
+ doc->setValue("url", url_value);
+ return doc;
}
Document::UP
-makeDoc10Array(DocBuilder &b)
+makeDoc10Array(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("url").
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- addTermAnnotation("altfluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("8").
- endSubField().
- endElement().
- startElement(1).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("82").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("9").
- endSubField().
- endElement().
- endField();
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StringFieldBuilder sfb(b);
+ sfb.url_mode(true);
+ ArrayFieldValue url_array(b.get_data_type("Array<url>"));
+ StructFieldValue url_value(b.get_data_type("url"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("82").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("8").build());
+ url_array.add(url_value);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("fragment", sfb.tokenize("9").build());
+ url_array.add(url_value);
+ doc->setValue("url", url_array);
+ return doc;
}
Document::UP
-makeDoc10WeightedSet(DocBuilder &b)
+makeDoc10WeightedSet(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- b.startIndexField("url").
- startElement(4).
- startSubField("all").
- addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.example.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("83").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- addTermAnnotation("altfluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("12").
- endSubField().
- endElement().
- startElement(7).
- startSubField("all").
- addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
- endSubField().
- startSubField("scheme").
- addUrlTokenizedString("http").
- endSubField().
- startSubField("host").
- addUrlTokenizedString("www.flickr.com").
- endSubField().
- startSubField("port").
- addUrlTokenizedString("85").
- endSubField().
- startSubField("path").
- addUrlTokenizedString("/fluke").
- endSubField().
- startSubField("query").
- addUrlTokenizedString("ab=2").
- endSubField().
- startSubField("fragment").
- addUrlTokenizedString("13").
- endSubField().
- endElement().
- endField();
- return b.endDocument();
+ auto doc = b.make_document("id:ns:searchdocument::10");
+ StringFieldBuilder sfb(b);
+ sfb.url_mode(true);
+ WeightedSetFieldValue url_wset(b.get_data_type("WeightedSet<url>"));
+ StructFieldValue url_value(b.get_data_type("url"));
+ url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.example.com").build());
+ url_value.setValue("port", sfb.tokenize("83").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("12").build());
+ url_wset.add(url_value, 4);
+ url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build());
+ url_value.setValue("scheme", sfb.tokenize("http").build());
+ url_value.setValue("host", sfb.tokenize("www.flickr.com").build());
+ url_value.setValue("port", sfb.tokenize("85").build());
+ url_value.setValue("path", sfb.tokenize("/fluke").build());
+ url_value.setValue("query", sfb.tokenize("ab=2").build());
+ url_value.setValue("fragment", sfb.tokenize("13").build());
+ url_wset.add(url_value, 7);
+ doc->setValue("url", url_wset);
+ return doc;
}
Document::UP
-makeDoc10Empty(DocBuilder &b)
+makeDoc10Empty(EmptyDocBuilder &b)
{
- b.startDocument("id:ns:searchdocument::10");
- return b.endDocument();
+ return b.make_document("id:ns:searchdocument::10");
}
}
struct UrlFieldInverterTest : public ::testing::Test {
Schema _schema;
- DocBuilder _b;
+ EmptyDocBuilder _b;
WordStore _word_store;
FieldIndexRemover _remover;
test::OrderedFieldIndexInserterBackend _inserter_backend;
@@ -195,9 +137,10 @@ struct UrlFieldInverterTest : public ::testing::Test {
return schema;
}
- UrlFieldInverterTest(Schema::CollectionType collectionType)
+ UrlFieldInverterTest(Schema::CollectionType collectionType,
+ EmptyDocBuilder::AddFieldsType add_fields)
: _schema(makeSchema(collectionType)),
- _b(_schema),
+ _b(add_fields),
_word_store(),
_remover(_word_store),
_inserter_backend(),
@@ -250,16 +193,32 @@ struct UrlFieldInverterTest : public ::testing::Test {
UrlFieldInverterTest::~UrlFieldInverterTest() = default;
+EmptyDocBuilder::AddFieldsType
+add_single_url = [](auto& header) {
+ header.addField("url", UrlDataType::getInstance().getId()); };
+
+EmptyDocBuilder::AddFieldsType
+add_array_url = [](auto& header) {
+ using namespace document::config_builder;
+ header.addField("url", Array(UrlDataType::getInstance().getId())); };
+
+EmptyDocBuilder::AddFieldsType
+add_wset_url = [](auto& header) {
+ using namespace document::config_builder;
+ header.addField("url", Wset(UrlDataType::getInstance().getId())); };
+
+
+
struct SingleInverterTest : public UrlFieldInverterTest {
- SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE) {}
+ SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE, add_single_url) {}
};
struct ArrayInverterTest : public UrlFieldInverterTest {
- ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY) {}
+ ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY, add_array_url) {}
};
struct WeightedSetInverterTest : public UrlFieldInverterTest {
- WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET) {}
+ WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET, add_wset_url) {}
};
diff --git a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp
index 64cb6a6c146..cb9fa8522a8 100644
--- a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp
+++ b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp
@@ -58,7 +58,7 @@ public:
DirectTensorStoreTest() : store() {}
virtual ~DirectTensorStoreTest() {
- store.clearHoldLists();
+ store.reclaim_all_memory();
}
void expect_tensor(const Value* exp, EntryRef ref) {
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 7877b488065..958423860e5 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -99,10 +99,10 @@ public:
commit();
}
void commit() {
- index->transfer_hold_lists(gen_handler.getCurrentGeneration());
+ index->assign_generation(gen_handler.getCurrentGeneration());
gen_handler.incGeneration();
- gen_handler.updateFirstUsedGeneration();
- index->trim_hold_lists(gen_handler.getFirstUsedGeneration());
+ gen_handler.update_oldest_used_generation();
+ index->reclaim_memory(gen_handler.get_oldest_used_generation());
}
void set_filter(std::vector<uint32_t> docids) {
uint32_t sz = 10;
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index d559fa592ad..47812c2a63c 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -267,10 +267,10 @@ public:
ASSERT_EQ(r.get(), nullptr);
}
void commit(uint32_t docid) {
- index->transfer_hold_lists(gen_handler.getCurrentGeneration());
+ index->assign_generation(gen_handler.getCurrentGeneration());
gen_handler.incGeneration();
- gen_handler.updateFirstUsedGeneration();
- index->trim_hold_lists(gen_handler.getFirstUsedGeneration());
+ gen_handler.update_oldest_used_generation();
+ index->reclaim_memory(gen_handler.get_oldest_used_generation());
std::lock_guard<std::mutex> guard(in_progress_lock);
in_progress->clearBit(docid);
// printf("commit: %u\n", docid);