diff options
Diffstat (limited to 'searchlib/src/tests')
26 files changed, 672 insertions, 1103 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index 7f0a88c9f86..3fa74b78d2a 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -38,8 +38,7 @@ public: generation_t getGen() const { return getCurrentGeneration(); } uint32_t getRefCount(generation_t gen) const { return getGenerationRefCount(gen); } void incGen() { incGeneration(); } - void updateFirstUsedGen() { updateFirstUsedGeneration(); } - generation_t getFirstUsedGen() const { return getFirstUsedGeneration(); } + generation_t oldest_used_gen() const { return get_oldest_used_generation(); } }; @@ -49,35 +48,35 @@ TEST("Test attribute guards") TestAttribute * v = static_cast<TestAttribute *> (vec.get()); EXPECT_EQUAL(v->getGen(), unsigned(0)); EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); { AttributeGuard g0(vec); EXPECT_EQUAL(v->getGen(), unsigned(0)); EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); { AttributeGuard g1(vec); EXPECT_EQUAL(v->getGen(), unsigned(0)); EXPECT_EQUAL(v->getRefCount(0), unsigned(2)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); v->incGen(); EXPECT_EQUAL(v->getGen(), unsigned(1)); EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); { AttributeGuard g0(vec); EXPECT_EQUAL(v->getGen(), unsigned(1)); EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); { v->incGen(); AttributeGuard g1(vec); @@ -85,19 +84,19 @@ TEST("Test attribute guards") EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); EXPECT_EQUAL(v->getRefCount(2), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); - v->updateFirstUsedGeneration(); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); + v->update_oldest_used_generation(); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(2)); EXPECT_EQUAL(v->getGen(), unsigned(2)); } diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp index e27065f1c25..b89a3827cc2 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp @@ -135,7 +135,7 @@ DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() _postings.clear(tree); } _postings.clearBuilder(); - _postings.clearHoldLists(); + _postings.reclaim_all_memory(); inc_generation(); } @@ -143,10 +143,10 @@ void DocumentWeightOrFilterSearchTest::inc_generation() { _postings.freeze(); - _postings.transferHoldLists(_gens.getCurrentGeneration()); + _postings.assign_generation(_gens.getCurrentGeneration()); _gens.incGeneration(); - _gens.updateFirstUsedGeneration(); - _postings.trimHoldLists(_gens.getFirstUsedGeneration()); + _gens.update_oldest_used_generation(); + _postings.reclaim_memory(_gens.get_oldest_used_generation()); } TEST_F(DocumentWeightOrFilterSearchTest, daat_or) diff --git a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp index 1d76473754f..9d717202551 100644 --- a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp +++ b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp @@ -147,9 +147,9 @@ TEST("requireThatComparatorWithTreeIsWorking") EXPECT_EQUAL(101, exp); t.clear(m); m.freeze(); - m.transferHoldLists(g.getCurrentGeneration()); + m.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - m.trimHoldLists(g.getFirstUsedGeneration()); + m.reclaim_memory(g.get_oldest_used_generation()); } TEST("requireThatFoldedLessIsWorking") diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 02ff01043b0..0542a253cc5 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -345,8 +345,8 @@ TEST(EnumStoreTest, test_hold_lists_and_generation) // check readers again checkReaders(ses, readers); - ses.transfer_hold_lists(sesGen); - ses.trim_hold_lists(sesGen + 1); + ses.assign_generation(sesGen); + ses.reclaim_memory(sesGen + 1); } void @@ -357,8 +357,8 @@ dec_ref_count(NumericEnumStore& store, NumericEnumStore::Index idx) updater.commit(); generation_t gen = 5; - store.transfer_hold_lists(gen); - store.trim_hold_lists(gen + 1); + store.assign_generation(gen); + store.reclaim_memory(gen + 1); } TEST(EnumStoreTest, address_space_usage_is_reported) @@ -882,9 +882,9 @@ namespace { void inc_generation(generation_t &gen, NumericEnumStore &store) { store.freeze_dictionary(); - store.transfer_hold_lists(gen); + store.assign_generation(gen); ++gen; - store.trim_hold_lists(gen); + store.reclaim_memory(gen); } } diff --git a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp index b9f3c23213e..0d2ce048111 100644 --- a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp +++ b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp @@ -73,14 +73,14 @@ TEST_F("makeReadGuard(false) acquires guards on both target and reference attrib EXPECT_EQUAL(2u, f.target_attr->getCurrentGeneration()); EXPECT_EQUAL(2u, f.reference_attr->getCurrentGeneration()); // Should still be holding guard for first generation of writes for both attributes - EXPECT_EQUAL(1u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(1u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(1u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(1u, f.reference_attr->get_oldest_used_generation()); } // Force a generation handler update add_n_docs_with_undefined_values(*f.reference_attr, 1); add_n_docs_with_undefined_values(*f.target_attr, 1); - EXPECT_EQUAL(3u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(3u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(3u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(3u, f.reference_attr->get_oldest_used_generation()); } TEST_F("makeReadGuard(true) acquires enum guard on target and regular guard on reference attribute", Fixture) { @@ -95,15 +95,15 @@ TEST_F("makeReadGuard(true) acquires enum guard on target and regular guard on r EXPECT_EQUAL(5u, f.target_attr->getCurrentGeneration()); EXPECT_EQUAL(2u, f.reference_attr->getCurrentGeneration()); - EXPECT_EQUAL(3u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(1u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(3u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(1u, f.reference_attr->get_oldest_used_generation()); EXPECT_TRUE(has_active_enum_guards(*f.target_attr)); } // Force a generation handler update add_n_docs_with_undefined_values(*f.reference_attr, 1); add_n_docs_with_undefined_values(*f.target_attr, 1); - EXPECT_EQUAL(7u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(3u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(7u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(3u, f.reference_attr->get_oldest_used_generation()); EXPECT_FALSE(has_active_enum_guards(*f.target_attr)); } diff --git a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp index 735ebcff6cf..8b8f4d2c4d4 100644 --- a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp +++ b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp @@ -41,11 +41,11 @@ class MyAttribute : public search::NotImplementedAttribute _mvMapping.shrink(committedDocIdLimit); setNumDocs(committedDocIdLimit); } - virtual void removeOldGenerations(generation_t firstUsed) override { - _mvMapping.trimHoldLists(firstUsed); + virtual void reclaim_memory(generation_t oldest_used_gen) override { + _mvMapping.reclaim_memory(oldest_used_gen); } - virtual void onGenerationChange(generation_t generation) override { - _mvMapping.transferHoldLists(generation - 1); + virtual void before_inc_generation(generation_t current_gen) override { + _mvMapping.assign_generation(current_gen); } public: @@ -115,8 +115,8 @@ public: ConstArrayRef act = get(docId); EXPECT_EQ(exp, std::vector<EntryT>(act.cbegin(), act.cend())); } - void transferHoldLists(generation_t generation) { _mvMapping->transferHoldLists(generation); } - void trimHoldLists(generation_t firstUsed) { _mvMapping->trimHoldLists(firstUsed); } + void assign_generation(generation_t current_gen) { _mvMapping->assign_generation(current_gen); } + void reclaim_memory(generation_t oldest_used_gen) { _mvMapping->reclaim_memory(oldest_used_gen); } void addDocs(uint32_t numDocs) { for (uint32_t i = 0; i < numDocs; ++i) { uint32_t doc = 0; @@ -245,12 +245,12 @@ TEST_F(IntMappingTest, test_that_old_value_is_not_overwritten_while_held) auto old3 = get(3); assertArray({5}, old3); set(3, {7}); - transferHoldLists(10); + assign_generation(10); assertArray({5}, old3); assertGet(3, {7}); - trimHoldLists(10); + reclaim_memory(10); assertArray({5}, old3); - trimHoldLists(11); + reclaim_memory(11); assertArray({0}, old3); } diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp index 36babec6a89..75e7faf0227 100644 --- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -64,11 +64,11 @@ protected: { _value_store.freeze_dictionary(); _store.freeze(); - _value_store.transfer_hold_lists(_gen_handler.getCurrentGeneration()); - _store.transferHoldLists(_gen_handler.getCurrentGeneration()); + _value_store.assign_generation(_gen_handler.getCurrentGeneration()); + _store.assign_generation(_gen_handler.getCurrentGeneration()); _gen_handler.incGeneration(); - _value_store.trim_hold_lists(_gen_handler.getFirstUsedGeneration()); - _store.trimHoldLists(_gen_handler.getFirstUsedGeneration()); + _value_store.reclaim_memory(_gen_handler.get_oldest_used_generation()); + _store.reclaim_memory(_gen_handler.get_oldest_used_generation()); } EntryRef add_sequence(int start_key, int end_key) diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp index 54efb3261c8..1eed3a015e1 100644 --- a/searchlib/src/tests/attribute/postinglist/postinglist.cpp +++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp @@ -201,7 +201,7 @@ private: PostingListNodeAllocator &postingsAlloc); void - removeOldGenerations(Tree &tree, + reclaim_memory(Tree &tree, ValueHandle &valueHandle, PostingList &postings, PostingListNodeAllocator &postingsAlloc); @@ -259,12 +259,12 @@ AttributePostingListTest::freeTree(bool verbose) static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); _intNodeAlloc->freeze(); _intPostings->freeze(); - _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intNodeAlloc->assign_generation(_handler.getCurrentGeneration()); _intPostings->clearBuilder(); - _intPostings->transferHoldLists(_handler.getCurrentGeneration()); + _intPostings->assign_generation(_handler.getCurrentGeneration()); _handler.incGeneration(); - _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration()); - _intPostings->trimHoldLists(_handler.getFirstUsedGeneration()); + _intNodeAlloc->reclaim_memory(_handler.get_oldest_used_generation()); + _intPostings->reclaim_memory(_handler.get_oldest_used_generation()); LOG(info, "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), @@ -613,9 +613,9 @@ AttributePostingListTest::doCompactEnumStore(Tree &tree, valueHandle.holdBuffer(*it); } generation_t generation = _handler.getCurrentGeneration(); - valueHandle.transferHoldLists(generation); + valueHandle.assign_generation(generation); _handler.incGeneration(); - valueHandle.trimHoldLists(_handler.getFirstUsedGeneration()); + valueHandle.reclaim_memory(_handler.get_oldest_used_generation()); LOG(info, "doCompactEnumStore done"); @@ -658,22 +658,22 @@ bumpGeneration(Tree &tree, (void) tree; (void) valueHandle; postingsAlloc.freeze(); - postingsAlloc.transferHoldLists(_handler.getCurrentGeneration()); - postings.transferHoldLists(_handler.getCurrentGeneration()); + postingsAlloc.assign_generation(_handler.getCurrentGeneration()); + postings.assign_generation(_handler.getCurrentGeneration()); _handler.incGeneration(); } void AttributePostingListTest:: -removeOldGenerations(Tree &tree, +reclaim_memory(Tree &tree, ValueHandle &valueHandle, PostingList &postings, PostingListNodeAllocator &postingsAlloc) { (void) tree; (void) valueHandle; - postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration()); - postings.trimHoldLists(_handler.getFirstUsedGeneration()); + postingsAlloc.reclaim_memory(_handler.get_oldest_used_generation()); + postings.reclaim_memory(_handler.get_oldest_used_generation()); } int @@ -689,7 +689,7 @@ AttributePostingListTest::Main() lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, _stlTree, _randomValues); _intNodeAlloc->freeze(); - _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intNodeAlloc->assign_generation(_handler.getCurrentGeneration()); doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore); removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, _stlTree, _randomValues); diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 222a3341ef9..9127c4b59fc 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -221,11 +221,11 @@ public: auto vector = _vectors.get_vector(docid).typify<double>(); _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); } - void transfer_hold_lists(generation_t current_gen) override { + void assign_generation(generation_t current_gen) override { _transfer_gen = current_gen; } - void trim_hold_lists(generation_t first_used_gen) override { - _trim_gen = first_used_gen; + void reclaim_memory(generation_t oldest_used_gen) override { + _trim_gen = oldest_used_gen; } bool consider_compact(const CompactionStrategy&) override { return false; diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index 6e60d14b8ff..8feb7b7e287 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -1,14 +1,20 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/diskindex/fusion.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/common/flush_token.h> #include <vespa/searchlib/diskindex/diskindex.h> -#include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> #include <vespa/searchlib/fef/fieldpositionsiterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/index/schemautil.h> #include <vespa/searchlib/memoryindex/document_inverter.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> @@ -31,7 +37,10 @@ LOG_SETUP("fusion_test"); namespace search { +using document::ArrayFieldValue; using document::Document; +using document::StringFieldValue; +using document::WeightedSetFieldValue; using fef::FieldPositionsIterator; using fef::TermFieldMatchData; using fef::TermFieldMatchDataArray; @@ -110,26 +119,20 @@ toString(FieldPositionsIterator posItr, bool hasElements = false, bool hasWeight } std::unique_ptr<Document> -make_doc10(DocBuilder &b) +make_doc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - addStr("e").addStr("f").addStr("z"). - endField(); - b.startIndexField("f1"). - addStr("w").addStr("x"). - addStr("y").addStr("z"). - endField(); - b.startIndexField("f2"). - startElement(4).addStr("ax").addStr("ay").addStr("z").endElement(). - startElement(5).addStr("ax").endElement(). - endField(); - b.startIndexField("f3"). - startElement(4).addStr("wx").addStr("z").endElement(). - endField(); - - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + doc->setValue("f0", sfb.tokenize("a b c d e f z").build()); + doc->setValue("f1", sfb.tokenize("w x y z").build()); + ArrayFieldValue string_array(b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("ax ay z").build()); + string_array.add(sfb.tokenize("ax").build()); + doc->setValue("f2", string_array); + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("wx z").build(), 4); + doc->setValue("f3", string_wset); + return doc; } Schema::IndexField @@ -151,6 +154,18 @@ make_schema(bool interleaved_features) return schema; } +EmptyDocBuilder::AddFieldsType +make_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + void assert_interleaved_features(DiskIndex &d, const vespalib::string &field, const vespalib::string &term, uint32_t doc_id, uint32_t exp_num_occs, uint32_t exp_field_length) { @@ -327,7 +342,8 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire addField("f2").addField("f3"). addField("f4")); FieldIndexCollection fic(schema, MockFieldLengthInspector()); - DocBuilder b(schema); + EmptyDocBuilder b(make_add_fields()); + StringFieldBuilder sfb(b); auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2); auto pushThreads = SequencedTaskExecutor::create(push_executor, 2); DocumentInverterContext inv_context(schema, *invertThreads, *pushThreads, fic); @@ -338,19 +354,21 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire inv.invertDocument(10, *doc, {}); myPushDocument(inv); - b.startDocument("id:ns:searchdocument::11"). - startIndexField("f3"). - startElement(-27).addStr("zz").endElement(). - endField(); - doc = b.endDocument(); + doc = b.make_document("id:ns:searchdocument::11"); + { + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.word("zz").build(), -27); + doc->setValue("f3", string_wset); + } inv.invertDocument(11, *doc, {}); myPushDocument(inv); - b.startDocument("id:ns:searchdocument::12"). - startIndexField("f3"). - startElement(0).addStr("zz0").endElement(). - endField(); - doc = b.endDocument(); + doc = b.make_document("id:ns:searchdocument::12"); + { + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.word("zz0").build(), 0); + doc->setValue("f3", string_wset); + } inv.invertDocument(12, *doc, {}); myPushDocument(inv); @@ -468,7 +486,7 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng FieldIndexCollection fic(_schema, field_length_inspector); uint32_t numDocs = 20; uint32_t numWords = 1000; - DocBuilder b(_schema); + EmptyDocBuilder b(make_add_fields()); auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2); auto pushThreads = SequencedTaskExecutor::create(push_executor, 2); DocumentInverterContext inv_context(_schema, *invertThreads, *pushThreads, fic); diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore deleted file mode 100644 index 999644fce87..00000000000 --- a/searchlib/src/tests/index/docbuilder/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*_test -.depend -Makefile -docbuilder_test -searchlib_docbuilder_test_app diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt deleted file mode 100644 index 7a969f602ea..00000000000 --- a/searchlib/src/tests/index/docbuilder/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_docbuilder_test_app TEST - SOURCES - docbuilder_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app) diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp deleted file mode 100644 index f76b61dcb78..00000000000 --- a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/log/log.h> -LOG_SETUP("docbuilder_test"); -#include <boost/algorithm/string/classification.hpp> -#include <boost/algorithm/string/split.hpp> -#include <vespa/searchlib/index/docbuilder.h> -#include <vespa/vespalib/encoding/base64.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/document/repo/fixedtyperepo.h> -#include <iostream> - -using namespace document; -using search::index::schema::CollectionType; - -namespace search::index { - -namespace -{ -std::string empty; -} - -namespace linguistics -{ -const vespalib::string SPANTREE_NAME("linguistics"); -} - - -TEST("test docBuilder") -{ - Schema s; - s.addIndexField(Schema::IndexField("ia", schema::DataType::STRING)); - s.addIndexField(Schema::IndexField("ib", schema::DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("ic", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addUriIndexFields(Schema::IndexField("iu", schema::DataType::STRING)); - s.addUriIndexFields(Schema::IndexField("iau", schema::DataType::STRING, CollectionType::ARRAY)); - s.addUriIndexFields(Schema::IndexField("iwu", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("aa", schema::DataType::INT32)); - s.addAttributeField(Schema::AttributeField("ab", schema::DataType::FLOAT)); - s.addAttributeField(Schema::AttributeField("ac", schema::DataType::STRING)); - s.addAttributeField(Schema::AttributeField("ad", schema::DataType::INT32, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("ae", schema::DataType::FLOAT, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("af", schema::DataType::STRING, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("ag", schema::DataType::INT32, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("ah", schema::DataType::FLOAT, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("ai", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("asp1", schema::DataType::INT32)); - s.addAttributeField(Schema::AttributeField("asp2", schema::DataType::INT64)); - s.addAttributeField(Schema::AttributeField("aap1", schema::DataType::INT32, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("aap2", schema::DataType::INT64, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("awp1", schema::DataType::INT32, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("awp2", schema::DataType::INT64, CollectionType::WEIGHTEDSET)); - - DocBuilder b(s); - Document::UP doc; - std::vector<std::string> lines; - std::vector<std::string>::const_iterator itr; - std::string xml; - - { // empty - doc = b.startDocument("id:ns:searchdocument::0").endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::0\"/>", *itr++); - EXPECT_EQUAL("", *itr++); - EXPECT_TRUE(itr == lines.end()); - } - { // all fields set - std::vector<char> binaryBlob; - binaryBlob.push_back('\0'); - binaryBlob.push_back('\2'); - binaryBlob.push_back('\1'); - std::string raw1s("Single Raw Element"); - std::string raw1a0("Array Raw Element 0"); - std::string raw1a1("Array Raw Element 1"); - std::string raw1w0("Weighted Set Raw Element 0"); - std::string raw1w1("Weighted Set Raw Element 1"); - raw1s += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1a0 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1a1 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1w0 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1w1 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - b.startDocument("id:ns:searchdocument::1"); - b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField(); - b.startIndexField("ib").startElement().addStr("foo").endElement(). - startElement(1).addStr("bar").addStr("baz").endElement().endField(); - b. startIndexField("ic"). - startElement(20).addStr("bar").addStr("baz").endElement(). - startElement().addStr("foo").endElement(). - endField(); - b.startIndexField("iu"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - b.startIndexField("iau"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - b.startIndexField("iwu"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - b.startAttributeField("aa").addInt(2147483647).endField(); - b.startAttributeField("ab").addFloat(1234.56).endField(); - b.startAttributeField("ac").addStr("foo baz").endField(); - b.startAttributeField("ad").startElement().addInt(10).endElement().endField(); - b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField(); - b.startAttributeField("af").startElement().addStr("foo").endElement().endField(); - b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField(); - b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField(); - b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField(); - b.startAttributeField("asp1").addInt(1001).endField(); - b.startAttributeField("asp2").addPosition(1002, 1003).endField(); - b.startAttributeField("aap1"). - startElement().addInt(1004).endElement(). - startElement().addInt(1005).endElement(). - endField(); - b.startAttributeField("aap2"). - startElement().addPosition(1006, 1007).endElement(). - startElement().addPosition(1008, 1009).endElement(). - endField(); - b.startAttributeField("awp1"). - startElement(41).addInt(1010).endElement(). - startElement(42).addInt(1011).endElement(). - endField(); - b.startAttributeField("awp2"). - startElement(43).addPosition(1012, 1013).endElement(). - startElement(44).addPosition(1014, 1015).endElement(). - endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::1\">", *itr++); - EXPECT_EQUAL("<iu>", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:81/fluke?ab=2#4</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>81</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>4</fragment>", *itr++); - EXPECT_EQUAL("</iu>", *itr++); - EXPECT_EQUAL("<aa>2147483647</aa>", *itr++); - EXPECT_EQUAL("<aap2>", *itr++); - EXPECT_EQUAL("<item>1047806</item>", *itr++); - EXPECT_EQUAL("<item>1048322</item>", *itr++); - EXPECT_EQUAL("</aap2>", *itr++); - EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++); - EXPECT_EQUAL("<ae>", *itr++); - EXPECT_EQUAL("<item>10.5</item>", *itr++); - EXPECT_EQUAL("</ae>", *itr++); - EXPECT_EQUAL("<ib>", *itr++); - EXPECT_EQUAL("<item>foo</item>", *itr++); - EXPECT_EQUAL("<item>bar baz</item>", *itr++); - EXPECT_EQUAL("</ib>", *itr++); - EXPECT_EQUAL("<ah>", *itr++); - EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++); - EXPECT_EQUAL("</ah>", *itr++); - EXPECT_EQUAL("<ic>", *itr++); - EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++); - EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++); - EXPECT_EQUAL("</ic>", *itr++); - EXPECT_EQUAL("<ac>foo baz</ac>", *itr++); - EXPECT_EQUAL("<awp2>", *itr++); - EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++); - EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++); - EXPECT_EQUAL("</awp2>", *itr++); - EXPECT_EQUAL("<iau>", *itr++); - EXPECT_EQUAL("<item>", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:82/fluke?ab=2#8</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>82</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>8</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("<item>", *itr++); - EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++); - EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>82</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>9</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("</iau>", *itr++); - EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++); - EXPECT_EQUAL("<ai>", *itr++); - EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++); - EXPECT_EQUAL("</ai>", *itr++); - EXPECT_EQUAL("<asp1>1001</asp1>", *itr++); - EXPECT_EQUAL("<ad>", *itr++); - EXPECT_EQUAL("<item>10</item>", *itr++); - EXPECT_EQUAL("</ad>", *itr++); - EXPECT_EQUAL("<iwu>", *itr++); - EXPECT_EQUAL("<item weight=\"4\">", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:83/fluke?ab=2#12</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>83</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>12</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("<item weight=\"7\">", *itr++); - EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++); - EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>85</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>13</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("</iwu>", *itr++); - EXPECT_EQUAL("<ab>1234.56</ab>", *itr++); - EXPECT_EQUAL("<ag>", *itr++); - EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++); - EXPECT_EQUAL("</ag>", *itr++); - EXPECT_EQUAL("<awp1>", *itr++); - EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++); - EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++); - EXPECT_EQUAL("</awp1>", *itr++); - EXPECT_EQUAL("<aap1>", *itr++); - EXPECT_EQUAL("<item>1004</item>", *itr++); - EXPECT_EQUAL("<item>1005</item>", *itr++); - EXPECT_EQUAL("</aap1>", *itr++); - EXPECT_EQUAL("<af>", *itr++); - EXPECT_EQUAL("<item>foo</item>", *itr++); - EXPECT_EQUAL("</af>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); -#if 0 - std::cout << "onedoc xml start -----" << std::endl << - xml << std::endl << - "-------" << std::endl; - std::cout << "onedoc toString start ----" << std::endl << - doc->toString(true) << std::endl << - "-------" << std::endl; -#endif - } - { // create one more to see that everything is cleared - b.startDocument("id:ns:searchdocument::2"); - b.startIndexField("ia").addStr("yes").endField(); - b.startAttributeField("aa").addInt(20).endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::2\">", *itr++); - EXPECT_EQUAL("<aa>20</aa>", *itr++); - EXPECT_EQUAL("<ia>yes</ia>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); - } - { // create field with cjk chars - b.startDocument("id:ns:searchdocument::3"); - b.startIndexField("ia"). - addStr("我就是那个"). - setAutoSpace(false). - addStr("大灰狼"). - setAutoSpace(true). - endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::3\">", *itr++); - EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); - const FieldValue::UP iaval = doc->getValue("ia"); - ASSERT_TRUE(iaval.get() != NULL); - const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *> - (iaval.get()); - ASSERT_TRUE(iasval != NULL); - StringFieldValue::SpanTrees trees = iasval->getSpanTrees(); - const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); - ASSERT_TRUE(tree != NULL); - std::vector<Span> spans; - std::vector<Span> expSpans; - for (SpanTree::const_iterator i = tree->begin(), ie = tree->end(); - i != ie; ++i) { - Annotation &ann = const_cast<Annotation &>(*i); - const Span *span = dynamic_cast<const Span *>(ann.getSpanNode()); - if (span == NULL) - continue; - spans.push_back(*span); - } - expSpans.push_back(Span(0, 15)); - expSpans.push_back(Span(0, 15)); - expSpans.push_back(Span(15, 9)); - expSpans.push_back(Span(15, 9)); - ASSERT_TRUE(expSpans == spans); -#if 0 - std::cout << "onedoc xml start -----" << std::endl << - xml << std::endl << - "-------" << std::endl; - std::cout << "onedoc toString start ----" << std::endl << - doc->toString(true) << std::endl << - "-------" << std::endl; -#endif - } -} - -TEST("test if index names are valid uri parts") { - EXPECT_FALSE(UriField::mightBePartofUri("all")); - EXPECT_FALSE(UriField::mightBePartofUri("fragment")); - EXPECT_FALSE(UriField::mightBePartofUri(".all")); - EXPECT_FALSE(UriField::mightBePartofUri("all.b")); - EXPECT_TRUE(UriField::mightBePartofUri("b.all")); - EXPECT_TRUE(UriField::mightBePartofUri("b.scheme")); - EXPECT_TRUE(UriField::mightBePartofUri("b.host")); - EXPECT_TRUE(UriField::mightBePartofUri("b.port")); - EXPECT_TRUE(UriField::mightBePartofUri("b.hostname")); - EXPECT_TRUE(UriField::mightBePartofUri("b.path")); - EXPECT_TRUE(UriField::mightBePartofUri("b.query")); - EXPECT_TRUE(UriField::mightBePartofUri("b.fragment")); -} - -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore deleted file mode 100644 index f15be1efcfe..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*_test -.depend -Makefile -doctypebuilder_test -searchlib_doctypebuilder_test_app diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt deleted file mode 100644 index 348ecde5a7c..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_doctypebuilder_test_app TEST - SOURCES - doctypebuilder_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app) diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp deleted file mode 100644 index 95854fa11b2..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/searchlib/index/doctypebuilder.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/vespalib/testkit/testapp.h> - -using namespace document; - -namespace search { -namespace index { - -using schema::CollectionType; -using schema::DataType; - -TEST("testSearchDocType") { - Schema s; - s.addIndexField(Schema::IndexField("ia", DataType::STRING)); - s.addIndexField(Schema::IndexField("ib", DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("ic", DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addUriIndexFields(Schema::IndexField("iu", DataType::STRING)); - s.addUriIndexFields(Schema::IndexField("iau", DataType::STRING, CollectionType::ARRAY)); - s.addUriIndexFields(Schema::IndexField("iwu", DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("aa", DataType::INT32)); - s.addAttributeField(Schema::AttributeField("spos", DataType::INT64)); - s.addAttributeField(Schema::AttributeField("apos", DataType::INT64, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("wpos", DataType::INT64, CollectionType::WEIGHTEDSET)); - - DocTypeBuilder docTypeBuilder(s); - document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig(); - DocumentTypeRepo repo(config); - const DocumentType *docType = repo.getDocumentType("searchdocument"); - ASSERT_TRUE(docType); - EXPECT_EQUAL(10u, docType->getFieldCount()); - - EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName()); - EXPECT_EQUAL("Array<String>", - docType->getField("ib").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<String>", - docType->getField("ic").getDataType().getName()); - EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName()); - EXPECT_EQUAL("Array<url>", - docType->getField("iau").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<url>", - docType->getField("iwu").getDataType().getName()); - - EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName()); - EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName()); - EXPECT_EQUAL("Array<Long>", - docType->getField("apos").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<Long>", - docType->getField("wpos").getDataType().getName()); -} - -TEST("require that multiple fields can have the same type") { - Schema s; - s.addIndexField(Schema::IndexField("array1", DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("array2", DataType::STRING, CollectionType::ARRAY)); - DocTypeBuilder docTypeBuilder(s); - document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig(); - DocumentTypeRepo repo(config); - const DocumentType *docType = repo.getDocumentType("searchdocument"); - ASSERT_TRUE(docType); - EXPECT_EQUAL(2u, docType->getFieldCount()); - - EXPECT_EQUAL("Array<String>", - docType->getField("array1").getDataType().getName()); - EXPECT_EQUAL("Array<String>", - docType->getField("array2").getDataType().getName()); -} - -} // namespace index -} // namespace search - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/string_field_builder/CMakeLists.txt b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt new file mode 100644 index 00000000000..f8774eae5ca --- /dev/null +++ b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_string_field_builder_test_app TEST + SOURCES + string_field_builder_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_string_field_builder_test_app COMMAND searchlib_string_field_builder_test_app) diff --git a/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp new file mode 100644 index 00000000000..8c2b641f724 --- /dev/null +++ b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/index/string_field_builder.h> +#include <vespa/document/annotation/annotation.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantree.h> +#include <vespa/document/datatype/annotationtype.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <cassert> +#include <iostream> + +using document::Annotation; +using document::AnnotationType; +using document::Span; +using document::SpanNode; +using document::SpanTree; +using document::StringFieldValue; +using search::index::EmptyDocBuilder; +using search::index::StringFieldBuilder; + +namespace +{ + +const vespalib::string SPANTREE_NAME("linguistics"); + +struct MyAnnotation { + int32_t start; + int32_t length; + std::optional<vespalib::string> label; + + MyAnnotation(int32_t start_in, int32_t length_in) noexcept + : start(start_in), + length(length_in), + label() + { + } + + MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept + : start(start_in), + length(length_in), + label(label_in) + { + } + + bool operator==(const MyAnnotation& rhs) const noexcept; +}; + +bool +MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept +{ + return start == rhs.start && + length == rhs.length && + label == rhs.label; +} + + +std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) { + os << "[" << ann.start << "," << ann.length << "]"; + if (ann.label.has_value()) { + os << "(\"" << ann.label.value() << "\")"; + } + return os; +} + +} + +class StringFieldBuilderTest : public testing::Test +{ +protected: + EmptyDocBuilder edb; + StringFieldBuilder sfb; + StringFieldBuilderTest(); + ~StringFieldBuilderTest(); + std::vector<MyAnnotation> get_annotations(const StringFieldValue& val); + void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val); +}; + +StringFieldBuilderTest::StringFieldBuilderTest() + : testing::Test(), + edb(), + sfb(edb) +{ +} + +StringFieldBuilderTest::~StringFieldBuilderTest() = default; + +std::vector<MyAnnotation> +StringFieldBuilderTest::get_annotations(const StringFieldValue& val) +{ + std::vector<MyAnnotation> result; + StringFieldValue::SpanTrees trees = val.getSpanTrees(); + const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME); + if (tree != nullptr) { + for (auto& ann : *tree) { + assert(ann.getType() == *AnnotationType::TERM); + auto span = dynamic_cast<const Span *>(ann.getSpanNode()); + if (span == nullptr) { + continue; + } + auto ann_fv = ann.getFieldValue(); + if (ann_fv == nullptr) { + result.emplace_back(span->from(), span->length()); + } else { + result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue()); + } + } + } + return result; +} + +void +StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val) +{ + EXPECT_EQ(exp, get_annotations(val)); + EXPECT_EQ(plain, val.getValue()); +} + +TEST_F(StringFieldBuilderTest, no_annotations) +{ + assert_annotations({}, "foo", StringFieldValue("foo")); +} + +TEST_F(StringFieldBuilderTest, single_word) +{ + assert_annotations({{0, 4}}, "word", sfb.word("word").build()); +} + +TEST_F(StringFieldBuilderTest, tokenize) +{ + assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build()); +} + +TEST_F(StringFieldBuilderTest, alt_word) +{ + assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp index 3f8a04d9460..83746b611fb 100644 --- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp @@ -1,8 +1,13 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/searchlib/index/docbuilder.h> -#include <vespa/searchlib/index/field_length_calculator.h> #include <vespa/searchlib/memoryindex/document_inverter.h> +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> @@ -19,9 +24,10 @@ namespace search::memoryindex { using document::Document; -using index::DocBuilder; +using index::EmptyDocBuilder; using index::FieldLengthCalculator; using index::Schema; +using index::StringFieldBuilder; using index::schema::CollectionType; using index::schema::DataType; using vespalib::SequencedTaskExecutor; @@ -29,64 +35,68 @@ using vespalib::ISequencedTaskExecutor; namespace { +EmptyDocBuilder::AddFieldsType +make_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + Document::UP -makeDoc10(DocBuilder &b) +makeDoc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); + return doc; } Document::UP -makeDoc11(DocBuilder &b) +makeDoc11(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::11"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("e").addStr("f"). - endField(); - b.startIndexField("f1"). - addStr("a").addStr("g"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::11"); + doc->setValue("f0", sfb.tokenize("a b e f").build()); + doc->setValue("f1", sfb.tokenize("a g").build()); + return doc; } Document::UP -makeDoc12(DocBuilder &b) +makeDoc12(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::12"); - b.startIndexField("f0"). - addStr("h").addStr("doc12"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::12"); + doc->setValue("f0", sfb.tokenize("h doc12").build()); + return doc; } Document::UP -makeDoc13(DocBuilder &b) +makeDoc13(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::13"); - b.startIndexField("f0"). - addStr("i").addStr("doc13"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::13"); + doc->setValue("f0", sfb.tokenize("i doc13").build()); + return doc; } Document::UP -makeDoc14(DocBuilder &b) +makeDoc14(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::14"); - b.startIndexField("f0"). - addStr("j").addStr("doc14"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::14"); + doc->setValue("f0", sfb.tokenize("j doc14").build()); + return doc; } Document::UP -makeDoc15(DocBuilder &b) +makeDoc15(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::15"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::15"); } } @@ -96,7 +106,7 @@ VESPA_THREAD_STACK_TAG(push_executor) struct DocumentInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; WordStore _word_store; @@ -118,7 +128,7 @@ struct DocumentInverterTest : public ::testing::Test { DocumentInverterTest() : _schema(makeSchema()), - _b(_schema), + _b(make_add_fields()), _invertThreads(SequencedTaskExecutor::create(invert_executor, 1)), _pushThreads(SequencedTaskExecutor::create(push_executor, 1)), _word_store(), diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index dcca1f136f6..04d1f08db6f 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -1,13 +1,22 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> #include <vespa/searchlib/fef/fieldpositionsiterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/document_inverter.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> #include <vespa/searchlib/memoryindex/field_index_collection.h> @@ -37,7 +46,11 @@ namespace search { using namespace fef; using namespace index; +using document::ArrayFieldValue; using document::Document; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; using queryeval::RankedSearchIteratorBase; using queryeval::SearchIterator; using search::index::schema::CollectionType; @@ -505,6 +518,12 @@ make_single_field_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_single_add_fields() +{ + return [](auto& header) { header.addField("f0", document::DataType::T_STRING); }; +} + template <typename FieldIndexType> struct FieldIndexTest : public ::testing::Test { Schema schema; @@ -706,6 +725,18 @@ make_multi_field_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_multi_field_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + struct FieldIndexCollectionTest : public ::testing::Test { Schema schema; FieldIndexCollection fic; @@ -907,16 +938,16 @@ class InverterTest : public ::testing::Test { public: Schema _schema; FieldIndexCollection _fic; - DocBuilder _b; + EmptyDocBuilder _b; std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; DocumentInverterContext _inv_context; DocumentInverter _inv; - InverterTest(const Schema& schema) + InverterTest(const Schema& schema, EmptyDocBuilder::AddFieldsType add_fields) : _schema(schema), _fic(_schema, MockFieldLengthInspector()), - _b(_schema), + _b(add_fields), _invertThreads(SequencedTaskExecutor::create(invert_executor, 2)), _pushThreads(SequencedTaskExecutor::create(push_executor, 2)), _inv_context(_schema, *_invertThreads, *_pushThreads, _fic), @@ -938,91 +969,63 @@ public: class BasicInverterTest : public InverterTest { public: - BasicInverterTest() : InverterTest(make_multi_field_schema()) {} + BasicInverterTest() : InverterTest(make_multi_field_schema(), make_multi_field_add_fields()) {} }; TEST_F(BasicInverterTest, require_that_inversion_is_working) { Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::20"); - _b.startIndexField("f0"). - addStr("a").addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::20"); + doc->setValue("f0", sfb.tokenize("a a b c d").build()); _inv.invertDocument(20, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::30"); - _b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - addStr("e").addStr("f"). - endField(); - _b.startIndexField("f1"). - addStr("\nw2").addStr("w").addStr("x"). - addStr("\nw3").addStr("y").addStr("z"). - endField(); - _b.startIndexField("f2"). - startElement(4). - addStr("w").addStr("x"). - endElement(). - startElement(5). - addStr("y").addStr("z"). - endElement(). - endField(); - _b.startIndexField("f3"). - startElement(6). - addStr("w").addStr("x"). - endElement(). - startElement(7). - addStr("y").addStr("z"). - endElement(). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::30"); + doc->setValue("f0", sfb.tokenize("a b c d e f").build()); + doc->setValue("f1", sfb.word("\nw2").tokenize(" w x "). + word("\nw3").tokenize(" y z").build()); + { + ArrayFieldValue string_array(_b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("w x").build()); + string_array.add(sfb.tokenize("y z").build()); + doc->setValue("f2", string_array); + } + { + WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("w x").build(), 6); + string_wset.add(sfb.tokenize("y z").build(), 7); + doc->setValue("f3", string_wset); + } _inv.invertDocument(30, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::40"); - _b.startIndexField("f0"). - addStr("a").addStr("a").addStr("b").addStr("c").addStr("a"). - addStr("e").addStr("f"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::40"); + doc->setValue("f0", sfb.tokenize("a a b c a e f").build()); _inv.invertDocument(40, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::999"); - _b.startIndexField("f0"). - addStr("this").addStr("is").addStr("_a_").addStr("test"). - addStr("for").addStr("insertion").addStr("speed").addStr("with"). - addStr("more").addStr("than").addStr("just").addStr("__a__"). - addStr("few").addStr("words").addStr("present").addStr("in"). - addStr("some").addStr("of").addStr("the").addStr("fields"). - endField(); - _b.startIndexField("f1"). - addStr("the").addStr("other").addStr("field").addStr("also"). - addStr("has").addStr("some").addStr("content"). - endField(); - _b.startIndexField("f2"). - startElement(1). - addStr("strange").addStr("things").addStr("here"). - addStr("has").addStr("some").addStr("content"). - endElement(). - endField(); - _b.startIndexField("f3"). - startElement(3). - addStr("not").addStr("a").addStr("weighty").addStr("argument"). - endElement(). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::999"); + doc->setValue("f0", sfb.tokenize("this is ").word("_a_"). + tokenize(" test for insertion speed with more than just "). + word("__a__").tokenize(" few words present in some of the fields").build()); + doc->setValue("f1", sfb.tokenize("the other field also has some content").build()); + { + ArrayFieldValue string_array(_b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("strange things here has some content").build()); + doc->setValue("f2", string_array); + } + { + WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("not a weighty argument").build(), 3); + doc->setValue("f3", string_wset); + } for (uint32_t docId = 10000; docId < 20000; ++docId) { _inv.invertDocument(docId, *doc, {}); myPushDocument(_inv); @@ -1132,19 +1135,17 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remover) { - Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::1"); - _b.startIndexField("f0").addStr("a").addStr("b").endField(); - _b.startIndexField("f1").addStr("a").addStr("c").endField(); - Document::UP doc1 = _b.endDocument(); - _inv.invertDocument(1, *doc1.get(), {}); + auto doc1 = _b.make_document("id:ns:searchdocument::1"); + doc1->setValue("f0", sfb.tokenize("a b").build()); + doc1->setValue("f1", sfb.tokenize("a c").build()); + _inv.invertDocument(1, *doc1, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::2"); - _b.startIndexField("f0").addStr("b").addStr("c").endField(); - Document::UP doc2 = _b.endDocument(); - _inv.invertDocument(2, *doc2.get(), {}); + auto doc2 = _b.make_document("id:ns:searchdocument::2"); + doc2->setValue("f0", sfb.tokenize("b c").build()); + _inv.invertDocument(2, *doc2, {}); myPushDocument(_inv); EXPECT_TRUE(assertPostingList("[1]", find("a", 0))); @@ -1172,136 +1173,71 @@ make_uri_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_uri_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + header.addField("iu", UrlDataType::getInstance().getId()) + .addField("iau", Array(UrlDataType::getInstance().getId())) + .addField("iwu", Wset(UrlDataType::getInstance().getId())); + }; +} + class UriInverterTest : public InverterTest { public: - UriInverterTest() : InverterTest(make_uri_schema()) {} + UriInverterTest() : InverterTest(make_uri_schema(), make_uri_add_fields()) {} }; TEST_F(UriInverterTest, require_that_uri_indexing_is_working) { Document::UP doc; - - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("iu"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - _b.startIndexField("iau"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - _b.startIndexField("iwu"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - doc = _b.endDocument(); + StringFieldBuilder sfb(_b); + sfb.url_mode(true); + StructFieldValue url_value(_b.get_data_type("url")); + + doc = _b.make_document("id:ns:searchdocument::10"); + url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("81").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("4").build()); + doc->setValue("iu", url_value); + ArrayFieldValue url_array(_b.get_data_type("Array<url>")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("82").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("8").build()); + url_array.add(url_value); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("fragment", sfb.tokenize("9").build()); + url_array.add(url_value); + doc->setValue("iau", url_array); + WeightedSetFieldValue url_wset(_b.get_data_type("WeightedSet<url>")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("83").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("12").build()); + url_wset.add(url_value, 4); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("port", sfb.tokenize("85").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("13").build()); + url_wset.add(url_value, 7); + doc->setValue("iwu", url_wset); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); @@ -1360,21 +1296,16 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) class CjkInverterTest : public InverterTest { public: - CjkInverterTest() : InverterTest(make_single_field_schema()) {} + CjkInverterTest() : InverterTest(make_single_field_schema(), make_single_add_fields()) {} }; TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) { Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("f0"). - addStr("我就是那个"). - setAutoSpace(false). - addStr("大灰狼"). - setAutoSpace(true). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.word("我就是那个").word("大灰狼").build()); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp index ed049a82c42..bf3a911a579 100644 --- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp @@ -1,8 +1,14 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> @@ -13,9 +19,12 @@ namespace search { +using document::ArrayFieldValue; using document::Document; -using index::DocBuilder; +using document::WeightedSetFieldValue; +using index::EmptyDocBuilder; using index::Schema; +using index::StringFieldBuilder; using index::schema::CollectionType; using index::schema::DataType; @@ -26,93 +35,91 @@ namespace memoryindex { namespace { Document::UP -makeDoc10(DocBuilder &b) +makeDoc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); + return doc; } Document::UP -makeDoc11(DocBuilder &b) +makeDoc11(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::11"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("e").addStr("f"). - endField(); - b.startIndexField("f1"). - addStr("a").addStr("g"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::11"); + doc->setValue("f0", sfb.tokenize("a b e f").build()); + doc->setValue("f1", sfb.tokenize("a g").build()); + return doc; } Document::UP -makeDoc12(DocBuilder &b) +makeDoc12(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::12"); - b.startIndexField("f0"). - addStr("h").addStr("doc12"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::12"); + doc->setValue("f0", sfb.tokenize("h doc12").build()); + return doc; } Document::UP -makeDoc13(DocBuilder &b) +makeDoc13(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::13"); - b.startIndexField("f0"). - addStr("i").addStr("doc13"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::13"); + doc->setValue("f0", sfb.tokenize("i doc13").build()); + return doc; } Document::UP -makeDoc14(DocBuilder &b) +makeDoc14(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::14"); - b.startIndexField("f0"). - addStr("j").addStr("doc14"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::14"); + doc->setValue("f0", sfb.tokenize("j doc14").build()); + return doc; } Document::UP -makeDoc15(DocBuilder &b) +makeDoc15(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::15"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::15"); } Document::UP -makeDoc16(DocBuilder &b) +makeDoc16(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::16"); - b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz"). - addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty"). - addStr("z").endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::16"); + doc->setValue("f0", sfb.tokenize("foo bar baz").alt_word("altbaz").tokenize(" y").alt_word("alty").tokenize(" z").build()); + return doc; } Document::UP -makeDoc17(DocBuilder &b) +makeDoc17(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::17"); - b.startIndexField("f1").addStr("foo0").addStr("bar0").endField(); - b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField(); - b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::17"); + doc->setValue("f1", sfb.tokenize("foo0 bar0").build()); + ArrayFieldValue string_array(b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("foo bar").build()); + string_array.add(sfb.tokenize("bar").build()); + doc->setValue("f2", string_array); + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("foo2 bar2").build(), 3); + string_wset.add(sfb.tokenize("bar2").build(), 4); + doc->setValue("f3", string_wset); + return doc; } vespalib::string corruptWord = "corruptWord"; Document::UP -makeCorruptDocument(DocBuilder &b, size_t wordOffset) +makeCorruptDocument(EmptyDocBuilder &b, size_t wordOffset) { - b.startDocument("id:ns:searchdocument::18"); - b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField(); - auto doc = b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::18"); + doc->setValue("f0", sfb.tokenize("before ").word(corruptWord).tokenize(" after z").build()); vespalib::nbostream stream; doc->serialize(stream); std::vector<char> raw; @@ -127,14 +134,14 @@ makeCorruptDocument(DocBuilder &b, size_t wordOffset) } vespalib::nbostream badstream; badstream.write(&raw[0], raw.size()); - return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream); + return std::make_unique<Document>(b.get_repo(), badstream); } } struct FieldInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; WordStore _word_store; FieldIndexRemover _remover; test::OrderedFieldIndexInserterBackend _inserter_backend; @@ -151,9 +158,21 @@ struct FieldInverterTest : public ::testing::Test { return schema; } + static EmptyDocBuilder::AddFieldsType + make_add_fields() + { + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; + } + FieldInverterTest() : _schema(makeSchema()), - _b(_schema), + _b(make_add_fields()), _word_store(), _remover(_word_store), _inserter_backend(), diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp index b3ea948dfa7..1730e34adb5 100644 --- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp +++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp @@ -1,11 +1,15 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/common/scheduletaskcallback.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/matchdatalayout.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/i_field_length_inspector.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/memory_index.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> @@ -59,6 +63,12 @@ struct MySetup : public IFieldLengthInspector { } return FieldLengthInfo(); } + void add_fields(document::config_builder::Struct& header) const { + for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) { + auto& field = schema.getIndexField(i); + header.addField(field.getName(), document::DataType::T_STRING); + } + } }; @@ -70,31 +80,38 @@ struct Index { std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; MemoryIndex index; - DocBuilder builder; + EmptyDocBuilder builder; + StringFieldBuilder sfb; + std::unique_ptr<Document> builder_doc; uint32_t docid; std::string currentField; + bool add_space; Index(const MySetup &setup); ~Index(); void closeField() { if (!currentField.empty()) { - builder.endField(); + builder_doc->setValue(currentField, sfb.build()); currentField.clear(); } } Index &doc(uint32_t id) { docid = id; - builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", id)); + builder_doc = builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", id)); return *this; } Index &field(const std::string &name) { closeField(); - builder.startIndexField(name); currentField = name; + add_space = false; return *this; } Index &add(const std::string &token) { - builder.addStr(token); + if (add_space) { + sfb.space(); + } + add_space = true; + sfb.word(token); return *this; } void internalSyncCommit() { @@ -106,7 +123,7 @@ struct Index { } Document::UP commit() { closeField(); - Document::UP d = builder.endDocument(); + Document::UP d = std::move(builder_doc); index.insertDocument(docid, *d, {}); internalSyncCommit(); return d; @@ -133,9 +150,12 @@ Index::Index(const MySetup &setup) _invertThreads(SequencedTaskExecutor::create(invert_executor, 2)), _pushThreads(SequencedTaskExecutor::create(push_executor, 2)), index(schema, setup, *_invertThreads, *_pushThreads), - builder(schema), + builder([&setup](auto& header) { setup.add_fields(header); }), + sfb(builder), + builder_doc(), docid(1), - currentField() + currentField(), + add_space(false) { } Index::~Index() = default; diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp index 969f483eef6..3995f06628c 100644 --- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp @@ -1,11 +1,21 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/memoryindex/url_field_inverter.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/schema_index_fields.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> -#include <vespa/searchlib/memoryindex/url_field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter_backend.h> @@ -14,6 +24,10 @@ namespace search { using document::Document; +using document::ArrayFieldValue; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; using index::schema::CollectionType; using index::schema::DataType; @@ -26,160 +40,88 @@ namespace { const vespalib::string url = "url"; Document::UP -makeDoc10Single(DocBuilder &b) +makeDoc10Single(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StructFieldValue url_value(b.get_data_type("url")); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("81").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("4").build()); + doc->setValue("url", url_value); + return doc; } Document::UP -makeDoc10Array(DocBuilder &b) +makeDoc10Array(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + ArrayFieldValue url_array(b.get_data_type("Array<url>")); + StructFieldValue url_value(b.get_data_type("url")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("82").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("8").build()); + url_array.add(url_value); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("fragment", sfb.tokenize("9").build()); + url_array.add(url_value); + doc->setValue("url", url_array); + return doc; } Document::UP -makeDoc10WeightedSet(DocBuilder &b) +makeDoc10WeightedSet(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + WeightedSetFieldValue url_wset(b.get_data_type("WeightedSet<url>")); + StructFieldValue url_value(b.get_data_type("url")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("83").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("12").build()); + url_wset.add(url_value, 4); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("port", sfb.tokenize("85").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("13").build()); + url_wset.add(url_value, 7); + doc->setValue("url", url_wset); + return doc; } Document::UP -makeDoc10Empty(DocBuilder &b) +makeDoc10Empty(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::10"); } } struct UrlFieldInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; WordStore _word_store; FieldIndexRemover _remover; test::OrderedFieldIndexInserterBackend _inserter_backend; @@ -195,9 +137,10 @@ struct UrlFieldInverterTest : public ::testing::Test { return schema; } - UrlFieldInverterTest(Schema::CollectionType collectionType) + UrlFieldInverterTest(Schema::CollectionType collectionType, + EmptyDocBuilder::AddFieldsType add_fields) : _schema(makeSchema(collectionType)), - _b(_schema), + _b(add_fields), _word_store(), _remover(_word_store), _inserter_backend(), @@ -250,16 +193,32 @@ struct UrlFieldInverterTest : public ::testing::Test { UrlFieldInverterTest::~UrlFieldInverterTest() = default; +EmptyDocBuilder::AddFieldsType +add_single_url = [](auto& header) { + header.addField("url", UrlDataType::getInstance().getId()); }; + +EmptyDocBuilder::AddFieldsType +add_array_url = [](auto& header) { + using namespace document::config_builder; + header.addField("url", Array(UrlDataType::getInstance().getId())); }; + +EmptyDocBuilder::AddFieldsType +add_wset_url = [](auto& header) { + using namespace document::config_builder; + header.addField("url", Wset(UrlDataType::getInstance().getId())); }; + + + struct SingleInverterTest : public UrlFieldInverterTest { - SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE) {} + SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE, add_single_url) {} }; struct ArrayInverterTest : public UrlFieldInverterTest { - ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY) {} + ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY, add_array_url) {} }; struct WeightedSetInverterTest : public UrlFieldInverterTest { - WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET) {} + WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET, add_wset_url) {} }; diff --git a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp index 64cb6a6c146..cb9fa8522a8 100644 --- a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp +++ b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp @@ -58,7 +58,7 @@ public: DirectTensorStoreTest() : store() {} virtual ~DirectTensorStoreTest() { - store.clearHoldLists(); + store.reclaim_all_memory(); } void expect_tensor(const Value* exp, EntryRef ref) { diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 7877b488065..958423860e5 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -99,10 +99,10 @@ public: commit(); } void commit() { - index->transfer_hold_lists(gen_handler.getCurrentGeneration()); + index->assign_generation(gen_handler.getCurrentGeneration()); gen_handler.incGeneration(); - gen_handler.updateFirstUsedGeneration(); - index->trim_hold_lists(gen_handler.getFirstUsedGeneration()); + gen_handler.update_oldest_used_generation(); + index->reclaim_memory(gen_handler.get_oldest_used_generation()); } void set_filter(std::vector<uint32_t> docids) { uint32_t sz = 10; diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index d559fa592ad..47812c2a63c 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -267,10 +267,10 @@ public: ASSERT_EQ(r.get(), nullptr); } void commit(uint32_t docid) { - index->transfer_hold_lists(gen_handler.getCurrentGeneration()); + index->assign_generation(gen_handler.getCurrentGeneration()); gen_handler.incGeneration(); - gen_handler.updateFirstUsedGeneration(); - index->trim_hold_lists(gen_handler.getFirstUsedGeneration()); + gen_handler.update_oldest_used_generation(); + index->reclaim_memory(gen_handler.get_oldest_used_generation()); std::lock_guard<std::mutex> guard(in_progress_lock); in_progress->clearBit(docid); // printf("commit: %u\n", docid); |