// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using document::DataType; using document::Document; using document::FieldValue; using document::StringFieldValue; using search::DocumentIdT; using search::FlushToken; using search::TuneFileIndexing; using search::TuneFileSearch; using search::diskindex::DiskIndex; using search::diskindex::IndexBuilder; using search::diskindex::SelectorArray; using search::docsummary::DocumentSummary; using search::fef::FieldPositionsIterator; using search::fef::MatchData; using search::fef::MatchDataLayout; using search::fef::TermFieldHandle; using search::fef::TermFieldMatchData; using search::index::DummyFileHeaderContext; using search::index::Schema; using search::index::test::MockFieldLengthInspector; using search::memoryindex::MemoryIndex; using search::query::SimpleStringTerm; using search::queryeval::Blueprint; using search::queryeval::FakeRequestContext; using search::queryeval::FieldSpec; using search::queryeval::FieldSpecList; using search::queryeval::SearchIterator; using search::queryeval::Searchable; using search::test::DocBuilder; using search::test::SchemaBuilder; using search::test::StringFieldBuilder; using std::ostringstream; using vespalib::string; namespace { void commit_memory_index_and_wait(MemoryIndex &memory_index) { vespalib::Gate gate; memory_index.commit(std::make_shared(gate)); gate.await(); } const string field_name = "string_field"; const string noise = "noise"; const string word1 = "foo"; const string word2 = "bar"; const DocumentIdT doc_id1 = 1; const DocumentIdT doc_id2 = 2; Document::UP buildDocument(DocBuilder & doc_builder, int id, const string &word) { ostringstream ost; ost << "id:ns:searchdocument::" << id; auto doc = doc_builder.make_document(ost.str()); doc->setValue(field_name, StringFieldBuilder(doc_builder).word(noise).space().word(word).build()); return doc; } // Performs a search using a Searchable. void testSearch(Searchable &source, const string &term, uint32_t doc_id) { FakeRequestContext requestContext; uint32_t fieldId = 0; MatchDataLayout mdl; TermFieldHandle handle = mdl.allocTermField(fieldId); MatchData::UP match_data = mdl.createMatchData(); SimpleStringTerm node(term, field_name, 0, search::query::Weight(0)); Blueprint::UP result = source.createBlueprint(requestContext, FieldSpecList().add(FieldSpec(field_name, 0, handle)), node); result->basic_plan(true, 1000); result->fetchPostings(search::queryeval::ExecuteInfo::FULL); SearchIterator::UP search_iterator = result->createSearch(*match_data); search_iterator->initFullRange(); ASSERT_TRUE(search_iterator.get()); ASSERT_TRUE(search_iterator->seek(doc_id)); EXPECT_EQ(doc_id, search_iterator->getDocId()); search_iterator->unpack(doc_id); FieldPositionsIterator it = match_data->resolveTermField(handle)->getIterator(); ASSERT_TRUE(it.valid()); EXPECT_EQ(1u, it.size()); EXPECT_EQ(1u, it.getPosition()); // All hits are at pos 1 in this index EXPECT_TRUE(!search_iterator->seek(doc_id + 1)); EXPECT_TRUE(search_iterator->isAtEnd()); } VESPA_THREAD_STACK_TAG(invert_executor) VESPA_THREAD_STACK_TAG(write_executor) // Creates a memory index, inserts documents, performs a few // searches, dumps the index to disk, and performs the searches // again. TEST(FeedAndSearchTest, require_that_memory_index_can_be_dumped_and_searched) { vespalib::ThreadStackExecutor sharedExecutor(2); auto indexFieldInverter = vespalib::SequencedTaskExecutor::create(invert_executor, 2); auto indexFieldWriter = vespalib::SequencedTaskExecutor::create(write_executor, 2); DocBuilder doc_builder([](auto& header) { header.addField(field_name, DataType::T_STRING); }); auto schema = SchemaBuilder(doc_builder).add_all_indexes().build(); MemoryIndex memory_index(schema, MockFieldLengthInspector(), *indexFieldInverter, *indexFieldWriter); Document::UP doc = buildDocument(doc_builder, doc_id1, word1); memory_index.insertDocument(doc_id1, *doc, {}); auto doc2 = buildDocument(doc_builder, doc_id2, word2); memory_index.insertDocument(doc_id2, *doc2, {}); commit_memory_index_and_wait(memory_index); testSearch(memory_index, word1, doc_id1); testSearch(memory_index, word2, doc_id2); const string index_dir = "test_index"; const uint32_t docIdLimit = memory_index.getDocIdLimit(); const uint64_t num_words = memory_index.getNumWords(); search::TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; { MockFieldLengthInspector fieldLengthInspector; IndexBuilder index_builder(schema, index_dir, docIdLimit, num_words, fieldLengthInspector, tuneFileIndexing, fileHeaderContext); memory_index.dump(index_builder); } // Fusion test. Keep all documents to get an "indentical" copy. const string index_dir2 = "test_index2"; std::vector fusionInputs; fusionInputs.push_back(index_dir); uint32_t fusionDocIdLimit = 0; using Fusion = search::diskindex::Fusion; bool fret1 = DocumentSummary::readDocIdLimit(index_dir, fusionDocIdLimit); ASSERT_TRUE(fret1); SelectorArray selector(fusionDocIdLimit, 0); { Fusion fusion(schema, index_dir2, fusionInputs, selector, tuneFileIndexing, fileHeaderContext); bool fret2 = fusion.merge(sharedExecutor, std::make_shared()); ASSERT_TRUE(fret2); } // Fusion test with all docs removed in output (doesn't affect word list) const string index_dir3 = "test_index3"; fusionInputs.clear(); fusionInputs.push_back(index_dir); fusionDocIdLimit = 0; bool fret3 = DocumentSummary::readDocIdLimit(index_dir, fusionDocIdLimit); ASSERT_TRUE(fret3); SelectorArray selector2(fusionDocIdLimit, 1); { Fusion fusion(schema, index_dir3, fusionInputs, selector2, tuneFileIndexing, fileHeaderContext); bool fret4 = fusion.merge(sharedExecutor, std::make_shared()); ASSERT_TRUE(fret4); } // Fusion test with all docs removed in input (affects word list) const string index_dir4 = "test_index4"; fusionInputs.clear(); fusionInputs.push_back(index_dir3); fusionDocIdLimit = 0; bool fret5 = DocumentSummary::readDocIdLimit(index_dir3, fusionDocIdLimit); ASSERT_TRUE(fret5); SelectorArray selector3(fusionDocIdLimit, 0); { Fusion fusion(schema, index_dir4, fusionInputs, selector3, tuneFileIndexing, fileHeaderContext); bool fret6 = fusion.merge(sharedExecutor, std::make_shared()); ASSERT_TRUE(fret6); } DiskIndex disk_index(index_dir); ASSERT_TRUE(disk_index.setup(TuneFileSearch())); testSearch(disk_index, word1, doc_id1); testSearch(disk_index, word2, doc_id2); DiskIndex disk_index2(index_dir2); ASSERT_TRUE(disk_index2.setup(TuneFileSearch())); testSearch(disk_index2, word1, doc_id1); testSearch(disk_index2, word2, doc_id2); } } // namespace int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); if (argc > 0) { DummyFileHeaderContext::setCreator(argv[0]); } return RUN_ALL_TESTS(); }