diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-07-08 11:32:06 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-07-08 12:10:06 +0000 |
commit | 2a1e136ffbe0119d254f8feeaf84ff997a7ee480 (patch) | |
tree | 0ad1f5afa72dfa0ddd2fafebcbf77a9c440653bb | |
parent | c1cb7003df193b356fd2a0ee425d6ea652077ad9 (diff) |
Add support searching a reference attribute using the parent document id.
3 files changed, 123 insertions, 8 deletions
diff --git a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp index 757a42e5315..d7428f02ba5 100644 --- a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp +++ b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp @@ -1,9 +1,14 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + #include <vespa/document/base/documentid.h> #include <vespa/searchlib/attribute/attributeguard.h> #include <vespa/searchlib/attribute/reference_attribute.h> #include <vespa/searchlib/common/i_gid_to_lid_mapper.h> #include <vespa/searchlib/common/i_gid_to_lid_mapper_factory.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/test/mock_gid_to_lid_mapping.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/io/fileutil.h> @@ -18,10 +23,15 @@ using document::GlobalId; using generation_t = vespalib::GenerationHandler::generation_t; using search::AttributeGuard; using search::AttributeVector; +using search::QueryTermSimple; using search::attribute::BasicType; using search::attribute::Config; using search::attribute::Reference; using search::attribute::ReferenceAttribute; +using search::attribute::SearchContextParams; +using search::fef::TermFieldMatchData; +using search::queryeval::FakeResult; +using search::queryeval::SearchIterator; using vespalib::ArrayRef; using vespalib::MemoryUsage; @@ -74,6 +84,8 @@ struct ReferenceAttributeTest : public ::testing::Test { resetAttr(); } + ~ReferenceAttributeTest() {} + AttributeVector &attr() { return *_attr; } @@ -419,4 +431,48 @@ TEST_F(ReferenceAttributeTest, unique_gids_are_tracked) EXPECT_EQ(0u, getUniqueGids()); } +struct ReferenceAttributeSearchTest : public ReferenceAttributeTest { + + constexpr static uint32_t doc_id_limit = 6; + + ReferenceAttributeSearchTest() + : ReferenceAttributeTest() + { + ensureDocIdLimit(doc_id_limit); + set(1, toGid(doc1)); + set(3, toGid(doc2)); + set(4, toGid(doc1)); + commit(); + } + + FakeResult perform_search(SearchIterator& itr) { + FakeResult result; + itr.initFullRange(); + for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) { + if (itr.seek(doc_id)) { + result.doc(doc_id); + } + } + return result; + } + + void expect_search_result(const std::string& term, const FakeResult& expected) { + auto ctx = _attr->getSearch(std::make_unique<QueryTermSimple>(term, QueryTermSimple::WORD), + SearchContextParams()); + TermFieldMatchData tfmd; + auto itr = ctx->createIterator(&tfmd, false); + FakeResult actual = perform_search(*itr); + EXPECT_EQ(expected, actual); + } + +}; + +TEST_F(ReferenceAttributeSearchTest, can_be_searched_by_document_id) +{ + expect_search_result(doc1, FakeResult().doc(1).doc(4)); + expect_search_result(doc2, FakeResult().doc(3)); + expect_search_result(doc3, FakeResult()); + expect_search_result("invalid document id", FakeResult()); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index f91108d066b..82539214ea9 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -1,21 +1,28 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "reference_attribute.h" -#include "reference_attribute_saver.h" #include "attributesaver.h" #include "readerbase.h" -#include <vespa/searchlib/common/i_gid_to_lid_mapper_factory.h> +#include "reference_attribute.h" +#include "reference_attribute_saver.h" +#include <vespa/document/base/documentid.h> +#include <vespa/document/base/idstringexception.h> #include <vespa/searchlib/common/i_gid_to_lid_mapper.h> -#include <vespa/vespalib/datastore/unique_store_builder.h> +#include <vespa/searchlib/common/i_gid_to_lid_mapper_factory.h> +#include <vespa/searchlib/query/queryterm.h> +#include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/datastore/datastore.hpp> #include <vespa/vespalib/datastore/unique_store.hpp> -#include <vespa/vespalib/data/fileheader.h> +#include <vespa/vespalib/datastore/unique_store_builder.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.attribute.reference_attribute"); namespace search::attribute { +using document::DocumentId; +using document::GlobalId; +using document::IdParseException; + namespace { // minimum dead bytes in unique store before consider compaction @@ -265,7 +272,7 @@ ReferenceAttribute::update(DocId doc, const GlobalId &gid) } const Reference * -ReferenceAttribute::getReference(DocId doc) +ReferenceAttribute::getReference(DocId doc) const { assert(doc < _indices.size()); EntryRef ref = _indices[doc]; @@ -411,6 +418,56 @@ ReferenceAttribute::onShrinkLidSpace() setNumDocs(committedDocIdLimit); } +namespace { + +class ReferenceSearchContext : public AttributeVector::SearchContext { +private: + const ReferenceAttribute& _ref_attr; + GlobalId _term; + +public: + ReferenceSearchContext(const ReferenceAttribute& ref_attr, const GlobalId& term) + : AttributeVector::SearchContext(ref_attr), + _ref_attr(ref_attr), + _term(term) + { + } + bool valid() const override { + return _term != GlobalId(); + } + int32_t onFind(DocId docId, int32_t elementId, int32_t& weight) const override { + if (elementId != 0) { + return -1; + } + auto* ref = _ref_attr.getReference(docId); + if (ref == nullptr) { + return -1; + } + weight = 1; + return (_term == ref->gid()) ? 0 : -1; + } + int32_t onFind(DocId docId, int32_t elementId) const override { + int32_t weight; + return onFind(docId, elementId, weight); + } +}; + +} + +AttributeVector::SearchContext::UP +ReferenceAttribute::getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams& params) const +{ + (void) params; + GlobalId gid; + try { + DocumentId docId(term->getTerm()); + gid = docId.getGlobalId(); + } catch (const IdParseException&) { + // The query term is not valid, which will result in an empty search iterator. + } + return std::make_unique<ReferenceSearchContext>(*this, gid); +} + IMPLEMENT_IDENTIFIABLE_ABSTRACT(ReferenceAttribute, AttributeVector); } diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h index 87d5a5c27bb..87d624eb21f 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h @@ -3,8 +3,8 @@ #pragma once #include "not_implemented_attribute.h" -#include "reference_mappings.h" #include "reference.h" +#include "reference_mappings.h" #include <vespa/vespalib/datastore/unique_store.h> #include <vespa/vespalib/util/rcuvector.h> @@ -71,7 +71,7 @@ public: bool addDoc(DocId &doc) override; uint32_t clearDoc(DocId doc) override; void update(DocId doc, const GlobalId &gid); - const Reference *getReference(DocId doc); + const Reference *getReference(DocId doc) const; void setGidToLidMapperFactory(std::shared_ptr<IGidToLidMapperFactory> gidToLidMapperFactory); std::shared_ptr<IGidToLidMapperFactory> getGidToLidMapperFactory() const { return _gidToLidMapperFactory; } TargetLids getTargetLids() const { return _referenceMappings.getTargetLids(); } @@ -91,6 +91,8 @@ public: foreach_lid(uint32_t targetLid, FunctionType &&func) const { _referenceMappings.foreach_lid(targetLid, std::forward<FunctionType>(func)); } + + SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams& params) const override; }; } |