summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-07-08 11:32:06 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-07-08 12:10:06 +0000
commit2a1e136ffbe0119d254f8feeaf84ff997a7ee480 (patch)
tree0ad1f5afa72dfa0ddd2fafebcbf77a9c440653bb
parentc1cb7003df193b356fd2a0ee425d6ea652077ad9 (diff)
Add support searching a reference attribute using the parent document id.
-rw-r--r--searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.h6
3 files changed, 123 insertions, 8 deletions
diff --git a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
index 757a42e5315..d7428f02ba5 100644
--- a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
+++ b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
@@ -1,9 +1,14 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
#include <vespa/document/base/documentid.h>
#include <vespa/searchlib/attribute/attributeguard.h>
#include <vespa/searchlib/attribute/reference_attribute.h>
#include <vespa/searchlib/common/i_gid_to_lid_mapper.h>
#include <vespa/searchlib/common/i_gid_to_lid_mapper_factory.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/query/queryterm.h>
+#include <vespa/searchlib/queryeval/fake_result.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/test/mock_gid_to_lid_mapping.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/io/fileutil.h>
@@ -18,10 +23,15 @@ using document::GlobalId;
using generation_t = vespalib::GenerationHandler::generation_t;
using search::AttributeGuard;
using search::AttributeVector;
+using search::QueryTermSimple;
using search::attribute::BasicType;
using search::attribute::Config;
using search::attribute::Reference;
using search::attribute::ReferenceAttribute;
+using search::attribute::SearchContextParams;
+using search::fef::TermFieldMatchData;
+using search::queryeval::FakeResult;
+using search::queryeval::SearchIterator;
using vespalib::ArrayRef;
using vespalib::MemoryUsage;
@@ -74,6 +84,8 @@ struct ReferenceAttributeTest : public ::testing::Test {
resetAttr();
}
+ ~ReferenceAttributeTest() {}
+
AttributeVector &attr() {
return *_attr;
}
@@ -419,4 +431,48 @@ TEST_F(ReferenceAttributeTest, unique_gids_are_tracked)
EXPECT_EQ(0u, getUniqueGids());
}
+struct ReferenceAttributeSearchTest : public ReferenceAttributeTest {
+
+ constexpr static uint32_t doc_id_limit = 6;
+
+ ReferenceAttributeSearchTest()
+ : ReferenceAttributeTest()
+ {
+ ensureDocIdLimit(doc_id_limit);
+ set(1, toGid(doc1));
+ set(3, toGid(doc2));
+ set(4, toGid(doc1));
+ commit();
+ }
+
+ FakeResult perform_search(SearchIterator& itr) {
+ FakeResult result;
+ itr.initFullRange();
+ for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) {
+ if (itr.seek(doc_id)) {
+ result.doc(doc_id);
+ }
+ }
+ return result;
+ }
+
+ void expect_search_result(const std::string& term, const FakeResult& expected) {
+ auto ctx = _attr->getSearch(std::make_unique<QueryTermSimple>(term, QueryTermSimple::WORD),
+ SearchContextParams());
+ TermFieldMatchData tfmd;
+ auto itr = ctx->createIterator(&tfmd, false);
+ FakeResult actual = perform_search(*itr);
+ EXPECT_EQ(expected, actual);
+ }
+
+};
+
+TEST_F(ReferenceAttributeSearchTest, can_be_searched_by_document_id)
+{
+ expect_search_result(doc1, FakeResult().doc(1).doc(4));
+ expect_search_result(doc2, FakeResult().doc(3));
+ expect_search_result(doc3, FakeResult());
+ expect_search_result("invalid document id", FakeResult());
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
index f91108d066b..82539214ea9 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
@@ -1,21 +1,28 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "reference_attribute.h"
-#include "reference_attribute_saver.h"
#include "attributesaver.h"
#include "readerbase.h"
-#include <vespa/searchlib/common/i_gid_to_lid_mapper_factory.h>
+#include "reference_attribute.h"
+#include "reference_attribute_saver.h"
+#include <vespa/document/base/documentid.h>
+#include <vespa/document/base/idstringexception.h>
#include <vespa/searchlib/common/i_gid_to_lid_mapper.h>
-#include <vespa/vespalib/datastore/unique_store_builder.h>
+#include <vespa/searchlib/common/i_gid_to_lid_mapper_factory.h>
+#include <vespa/searchlib/query/queryterm.h>
+#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/datastore/datastore.hpp>
#include <vespa/vespalib/datastore/unique_store.hpp>
-#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/datastore/unique_store_builder.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.attribute.reference_attribute");
namespace search::attribute {
+using document::DocumentId;
+using document::GlobalId;
+using document::IdParseException;
+
namespace {
// minimum dead bytes in unique store before consider compaction
@@ -265,7 +272,7 @@ ReferenceAttribute::update(DocId doc, const GlobalId &gid)
}
const Reference *
-ReferenceAttribute::getReference(DocId doc)
+ReferenceAttribute::getReference(DocId doc) const
{
assert(doc < _indices.size());
EntryRef ref = _indices[doc];
@@ -411,6 +418,56 @@ ReferenceAttribute::onShrinkLidSpace()
setNumDocs(committedDocIdLimit);
}
+namespace {
+
+class ReferenceSearchContext : public AttributeVector::SearchContext {
+private:
+ const ReferenceAttribute& _ref_attr;
+ GlobalId _term;
+
+public:
+ ReferenceSearchContext(const ReferenceAttribute& ref_attr, const GlobalId& term)
+ : AttributeVector::SearchContext(ref_attr),
+ _ref_attr(ref_attr),
+ _term(term)
+ {
+ }
+ bool valid() const override {
+ return _term != GlobalId();
+ }
+ int32_t onFind(DocId docId, int32_t elementId, int32_t& weight) const override {
+ if (elementId != 0) {
+ return -1;
+ }
+ auto* ref = _ref_attr.getReference(docId);
+ if (ref == nullptr) {
+ return -1;
+ }
+ weight = 1;
+ return (_term == ref->gid()) ? 0 : -1;
+ }
+ int32_t onFind(DocId docId, int32_t elementId) const override {
+ int32_t weight;
+ return onFind(docId, elementId, weight);
+ }
+};
+
+}
+
+AttributeVector::SearchContext::UP
+ReferenceAttribute::getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams& params) const
+{
+ (void) params;
+ GlobalId gid;
+ try {
+ DocumentId docId(term->getTerm());
+ gid = docId.getGlobalId();
+ } catch (const IdParseException&) {
+ // The query term is not valid, which will result in an empty search iterator.
+ }
+ return std::make_unique<ReferenceSearchContext>(*this, gid);
+}
+
IMPLEMENT_IDENTIFIABLE_ABSTRACT(ReferenceAttribute, AttributeVector);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
index 87d5a5c27bb..87d624eb21f 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
@@ -3,8 +3,8 @@
#pragma once
#include "not_implemented_attribute.h"
-#include "reference_mappings.h"
#include "reference.h"
+#include "reference_mappings.h"
#include <vespa/vespalib/datastore/unique_store.h>
#include <vespa/vespalib/util/rcuvector.h>
@@ -71,7 +71,7 @@ public:
bool addDoc(DocId &doc) override;
uint32_t clearDoc(DocId doc) override;
void update(DocId doc, const GlobalId &gid);
- const Reference *getReference(DocId doc);
+ const Reference *getReference(DocId doc) const;
void setGidToLidMapperFactory(std::shared_ptr<IGidToLidMapperFactory> gidToLidMapperFactory);
std::shared_ptr<IGidToLidMapperFactory> getGidToLidMapperFactory() const { return _gidToLidMapperFactory; }
TargetLids getTargetLids() const { return _referenceMappings.getTargetLids(); }
@@ -91,6 +91,8 @@ public:
foreach_lid(uint32_t targetLid, FunctionType &&func) const {
_referenceMappings.foreach_lid(targetLid, std::forward<FunctionType>(func));
}
+
+ SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams& params) const override;
};
}