summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahoo-inc.com>2017-02-02 14:11:31 +0000
committerTor Egge <Tor.Egge@yahoo-inc.com>2017-02-02 14:50:50 +0000
commit9d79427593beec8a7bfb1609be7379c7924853d3 (patch)
treef8ecb52a0034eb18686cd756beebc8e2c34970f9
parentc42033757432562d303f59b76be3ed43efd26939 (diff)
Add reference attribute.
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/basictype.cpp3
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/basictype.h1
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/reference_attribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/reference_attribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp158
-rw-r--r--searchlib/src/tests/datastore/unique_store/unique_store_test.cpp46
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp252
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.h51
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp80
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.h41
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store.h18
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store.hpp49
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store_builder.h47
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store_builder.hpp60
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store_saver.h53
-rw-r--r--searchlib/src/vespa/searchlib/datastore/unique_store_saver.hpp49
19 files changed, 917 insertions, 5 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp b/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp
index b1a4539ebb8..74b3e331671 100644
--- a/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp
+++ b/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp
@@ -19,7 +19,8 @@ const BasicType::TypeInfo BasicType::_typeTable[BasicType::MAX_TYPE] = {
{ BasicType::FLOAT, sizeof(float), "float" },
{ BasicType::DOUBLE, sizeof(double), "double" },
{ BasicType::PREDICATE, 0, "predicate" },
- { BasicType::TENSOR, 0, "tensor" }
+ { BasicType::TENSOR, 0, "tensor" },
+ { BasicType::REFERENCE, 12, "reference" }
};
BasicType::Type
diff --git a/searchcommon/src/vespa/searchcommon/attribute/basictype.h b/searchcommon/src/vespa/searchcommon/attribute/basictype.h
index 26b17c46f60..425459b41b8 100644
--- a/searchcommon/src/vespa/searchcommon/attribute/basictype.h
+++ b/searchcommon/src/vespa/searchcommon/attribute/basictype.h
@@ -24,6 +24,7 @@ class BasicType
DOUBLE = 10,
PREDICATE = 11,
TENSOR = 12,
+ REFERENCE = 13,
MAX_TYPE
};
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 57beefe47a2..9daf335306a 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -85,6 +85,7 @@ vespa_define_module(
src/tests/attribute/multi_value_mapping
src/tests/attribute/postinglist
src/tests/attribute/postinglistattribute
+ src/tests/attribute/reference_attribute
src/tests/attribute/searchable
src/tests/attribute/searchcontext
src/tests/attribute/sourceselector
diff --git a/searchlib/src/tests/attribute/reference_attribute/CMakeLists.txt b/searchlib/src/tests/attribute/reference_attribute/CMakeLists.txt
new file mode 100644
index 00000000000..1ee25da88a5
--- /dev/null
+++ b/searchlib/src/tests/attribute/reference_attribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_reference_attribute_test_app TEST
+ SOURCES
+ reference_attribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_reference_attribute_test_app COMMAND searchlib_reference_attribute_test_app)
diff --git a/searchlib/src/tests/attribute/reference_attribute/FILES b/searchlib/src/tests/attribute/reference_attribute/FILES
new file mode 100644
index 00000000000..97203f7b4b0
--- /dev/null
+++ b/searchlib/src/tests/attribute/reference_attribute/FILES
@@ -0,0 +1 @@
+reference_attribute_test.cpp
diff --git a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
new file mode 100644
index 00000000000..fbf519ccca1
--- /dev/null
+++ b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
@@ -0,0 +1,158 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("reference_attribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/test/insertion_operators.h>
+#include <vespa/vespalib/util/traits.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/searchlib/attribute/reference_attribute.h>
+#include <vespa/document/base/documentid.h>
+
+using search::MemoryUsage;
+using vespalib::ArrayRef;
+using generation_t = vespalib::GenerationHandler::generation_t;
+using search::attribute::ReferenceAttribute;
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::AttributeVector;
+using document::GlobalId;
+using document::DocumentId;
+
+namespace {
+
+GlobalId toGid(vespalib::stringref docId) {
+ return DocumentId(docId).getGlobalId();
+}
+
+vespalib::string doc1("id:test:music::1");
+vespalib::string doc2("id:test:music::2");
+
+}
+
+
+struct Fixture
+{
+ std::unique_ptr<ReferenceAttribute> _attr;
+
+ Fixture()
+ : _attr()
+ {
+ resetAttr();
+ }
+
+ AttributeVector &attr() {
+ return *_attr;
+ }
+
+ void resetAttr() {
+ _attr.reset();
+ _attr = std::make_unique<ReferenceAttribute>("test",
+ Config(BasicType::REFERENCE));
+ }
+
+ void ensureSpace(uint32_t docId) {
+ while (attr().getNumDocs() <= docId) {
+ uint32_t newDocId = 0u;
+ _attr->addDoc(newDocId);
+ _attr->commit();
+ }
+ }
+
+ search::attribute::Status getStatus() {
+ attr().commit(true);
+ return attr().getStatus();
+ }
+
+ const GlobalId *get(uint32_t doc) {
+ return _attr->getReference(doc);
+ }
+
+ void set(uint32_t doc, const GlobalId &gid) {
+ _attr->update(doc, gid);
+ }
+
+ void clear(uint32_t doc) {
+ _attr->clearDoc(doc);
+ }
+
+ void commit() { attr().commit(); }
+
+ void assertNoRef(uint32_t doc)
+ {
+ EXPECT_TRUE(get(doc) == nullptr);
+ }
+
+ void assertRef(vespalib::stringref str, uint32_t doc) {
+ const GlobalId *gid = get(doc);
+ EXPECT_TRUE(gid != nullptr);
+ EXPECT_EQUAL(toGid(str), *gid);
+ }
+
+ void save() {
+ attr().save();
+ }
+
+ void load() {
+ resetAttr();
+ attr().load();
+ }
+};
+
+TEST_F("require that we can instantiate reference attribute", Fixture)
+{
+ f.ensureSpace(4);
+ f.set(1, toGid(doc1));
+ f.set(2, toGid(doc2));
+ f.commit();
+
+ TEST_DO(f.assertNoRef(3));
+ TEST_DO(f.assertRef(doc1, 1));
+ TEST_DO(f.assertRef(doc2, 2));
+}
+
+
+TEST_F("require that we can compact attribute", Fixture)
+{
+ f.ensureSpace(4);
+ f.set(1, toGid(doc1));
+ f.set(2, toGid(doc2));
+ f.commit();
+ search::attribute::Status oldStatus = f.getStatus();
+ search::attribute::Status newStatus = oldStatus;
+ uint64_t iter = 0;
+ uint64_t iterLimit = 100000;
+ for (; iter < iterLimit; ++iter) {
+ f.clear(2);
+ f.set(2, toGid(doc2));
+ newStatus = f.getStatus();
+ if (newStatus.getUsed() < oldStatus.getUsed()) {
+ break;
+ }
+ oldStatus = newStatus;
+ }
+ EXPECT_GREATER(iterLimit, iter);
+ LOG(info,
+ "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64,
+ iter, oldStatus.getUsed(), newStatus.getUsed());
+ TEST_DO(f.assertNoRef(3));
+ TEST_DO(f.assertRef(doc1, 1));
+ TEST_DO(f.assertRef(doc2, 2));
+}
+
+TEST_F("require that we can save and load attribute", Fixture)
+{
+ f.ensureSpace(4);
+ f.set(1, toGid(doc1));
+ f.set(2, toGid(doc2));
+ f.commit();
+ f.save();
+ f.load();
+ TEST_DO(f.assertNoRef(3));
+ TEST_DO(f.assertRef(doc1, 1));
+ TEST_DO(f.assertRef(doc2, 2));
+ EXPECT_TRUE(vespalib::unlink("test.dat"));
+ EXPECT_TRUE(vespalib::unlink("test.udat"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/datastore/unique_store/unique_store_test.cpp b/searchlib/src/tests/datastore/unique_store/unique_store_test.cpp
index aefbf9a9835..5bbb463f152 100644
--- a/searchlib/src/tests/datastore/unique_store/unique_store_test.cpp
+++ b/searchlib/src/tests/datastore/unique_store/unique_store_test.cpp
@@ -111,6 +111,8 @@ struct Fixture
refStore = compactedRefStore;
}
size_t entrySize() const { return sizeof(EntryT); }
+ auto getBuilder(uint32_t uniqueValuesHint) { return store.getBuilder(uniqueValuesHint); }
+ auto getSaver() { return store.getSaver(); }
};
using NumberFixture = Fixture<uint32_t>;
@@ -205,4 +207,48 @@ TEST_F("require that compaction works", NumberFixture)
TEST_DO(f.assertStoreContent());
}
+TEST_F("require that builder works", NumberFixture)
+{
+ auto builder = f.getBuilder(2);
+ builder.add(10);
+ builder.add(20);
+ builder.setupRefCounts();
+ EntryRef val10Ref = builder.mapEnumValueToEntryRef(1);
+ EntryRef val20Ref = builder.mapEnumValueToEntryRef(2);
+ TEST_DO(f.assertBufferState(val10Ref, MemStats().used(3).dead(1))); // Note: First element is reserved
+ EXPECT_TRUE(val10Ref.valid());
+ EXPECT_TRUE(val20Ref.valid());
+ EXPECT_NOT_EQUAL(val10Ref.ref(), val20Ref.ref());
+ f.assertGet(val10Ref, 10);
+ f.assertGet(val20Ref, 20);
+ builder.makeDictionary();
+ EntryRef ref = f.add(10);
+ EXPECT_EQUAL(val10Ref.ref(), ref.ref());
+ ref = f.add(20);
+ EXPECT_EQUAL(val20Ref.ref(), ref.ref());
+}
+
+TEST_F("require that saver works", NumberFixture)
+{
+ EntryRef val10Ref = f.add(10);
+ EntryRef val20Ref = f.add(20);
+ f.remove(f.add(40));
+ f.trimHoldLists();
+
+ auto saver = f.getSaver();
+ std::vector<uint32_t> refs;
+ saver.foreach_key([&](EntryRef ref) { refs.push_back(ref.ref()); });
+ std::vector<uint32_t> expRefs;
+ expRefs.push_back(val10Ref.ref());
+ expRefs.push_back(val20Ref.ref());
+ EXPECT_EQUAL(expRefs, refs);
+ saver.enumerateValues();
+ uint32_t invalidEnum = saver.mapEntryRefToEnumValue(EntryRef());
+ uint32_t enumValue10 = saver.mapEntryRefToEnumValue(val10Ref);
+ uint32_t enumValue20 = saver.mapEntryRefToEnumValue(val20Ref);
+ EXPECT_EQUAL(0u, invalidEnum);
+ EXPECT_EQUAL(1u, enumValue10);
+ EXPECT_EQUAL(2u, enumValue20);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 09f42fa6fe5..1f710f1a544 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -72,6 +72,8 @@ vespa_add_library(searchlib_attribute OBJECT
postingstore.cpp
predicate_attribute.cpp
readerbase.cpp
+ reference_attribute.cpp
+ reference_attribute_saver.cpp
singleenumattribute.cpp
singleenumattributesaver.cpp
singlenumericattribute.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
new file mode 100644
index 00000000000..4015493d25d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
@@ -0,0 +1,252 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "reference_attribute.h"
+#include "attributesaver.h"
+#include <vespa/vespalib/data/fileheader.h>
+#include "readerbase.h"
+#include <vespa/searchlib/datastore/unique_store_builder.h>
+#include <vespa/searchlib/datastore/datastore.hpp>
+#include "reference_attribute_saver.h"
+
+namespace search {
+namespace attribute {
+
+namespace {
+
+// minimum dead bytes in multi value mapping before consider compaction
+constexpr size_t DEAD_BYTES_SLACK = 0x10000u;
+
+const vespalib::string uniqueValueCountTag = "uniqueValueCount";
+
+uint64_t
+extractUniqueValueCount(const vespalib::GenericHeader &header)
+ {
+ return (header.hasTag(uniqueValueCountTag)) ? header.getTag(uniqueValueCountTag).asInteger() : 0u;
+ }
+
+}
+
+ReferenceAttribute::ReferenceAttribute(const vespalib::stringref baseFileName,
+ const Config & cfg)
+ : NotImplementedAttribute(baseFileName, cfg),
+ _store(),
+ _indices(getGenerationHolder())
+{
+ setEnum(true);
+ enableEnumeratedSave(true);
+}
+
+ReferenceAttribute::~ReferenceAttribute()
+{
+}
+
+bool
+ReferenceAttribute::onAddDoc(DocId doc) {
+ if (doc < _indices.capacity()) {
+ _indices.reserve(doc+1);
+ return true;
+ }
+ return false;
+}
+
+void
+ReferenceAttribute::onAddDocs(DocId limit) {
+ _indices.reserve(limit);
+}
+
+bool
+ReferenceAttribute::addDoc(DocId &doc)
+{
+ bool incGen = _indices.isFull();
+ doc = _indices.size();
+ _indices.push_back(EntryRef());
+ incNumDocs();
+ updateUncommittedDocIdLimit(doc);
+ incGen |= onAddDoc(doc);
+ if (incGen) {
+ incGeneration();
+ } else {
+ removeAllOldGenerations();
+ }
+ return true;
+}
+
+uint32_t
+ReferenceAttribute::clearDoc(DocId doc)
+{
+ updateUncommittedDocIdLimit(doc);
+ assert(doc < _indices.size());
+ EntryRef oldRef = _indices[doc];
+ if (oldRef.valid()) {
+ _indices[doc] = EntryRef();
+ _store.remove(oldRef);
+ return 1u;
+ } else {
+ return 0u;
+ }
+}
+
+void
+ReferenceAttribute::removeOldGenerations(generation_t firstUsed)
+{
+ _store.trimHoldLists(firstUsed);
+ getGenerationHolder().trimHoldLists(firstUsed);
+}
+
+void
+ReferenceAttribute::onGenerationChange(generation_t generation)
+{
+ _store.freeze();
+ _store.transferHoldLists(generation - 1);
+ getGenerationHolder().transferHoldLists(generation - 1);
+}
+
+void
+ReferenceAttribute::onCommit()
+{
+ // Note: Cost can be reduced if unneeded generation increments are dropped
+ incGeneration();
+ if (considerCompact(getConfig().getCompactionStrategy())) {
+ incGeneration();
+ updateStat(true);
+ }
+}
+
+void
+ReferenceAttribute::onUpdateStat()
+{
+ MemoryUsage total = _store.getMemoryUsage();
+ _cachedUniqueStoreMemoryUsage = total;
+ total.merge(_indices.getMemoryUsage());
+ updateStatistics(getTotalValueCount(), getUniqueValueCount(),
+ total.allocatedBytes(),
+ total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold());
+}
+
+std::unique_ptr<AttributeSaver>
+ReferenceAttribute::onInitSave()
+{
+ vespalib::GenerationHandler::Guard guard(this->getGenerationHandler().
+ takeGuard());
+ return std::make_unique<ReferenceAttributeSaver>
+ (std::move(guard),
+ createSaveTargetConfig(),
+ getIndicesCopy(getCommittedDocIdLimit()),
+ _store);
+}
+
+bool
+ReferenceAttribute::onLoad()
+{
+ ReaderBase attrReader(*this);
+ bool ok(attrReader.getHasLoadData());
+
+ if (!ok) {
+ return false;
+ }
+
+ setCreateSerialNum(attrReader.getCreateSerialNum());
+
+ assert(attrReader.getEnumerated());
+ assert(!attrReader.hasIdx());
+
+ size_t numDocs(0);
+ uint64_t numValues(0);
+
+ numValues = attrReader.getEnumCount();
+ numDocs = numValues;
+
+ fileutil::LoadedBuffer::UP udatBuffer(loadUDAT());
+
+ const GenericHeader &header = udatBuffer->getHeader();
+ uint32_t uniqueValueCount = extractUniqueValueCount(header);
+ assert(uniqueValueCount * sizeof(GlobalId) == udatBuffer->size());
+
+ vespalib::ConstArrayRef<GlobalId> uniques(static_cast<const GlobalId *>(udatBuffer->buffer()), uniqueValueCount);
+
+ auto builder = _store.getBuilder(uniqueValueCount);
+
+ for (const auto &value : uniques) {
+ builder.add(value);
+ }
+ builder.setupRefCounts();
+
+ _indices.clear();
+ _indices.unsafe_reserve(numDocs);
+
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t enumValue = attrReader.getNextEnum();
+ _indices.push_back(builder.mapEnumValueToEntryRef(enumValue));
+ }
+ builder.makeDictionary();
+ incGeneration();
+
+ return true;
+}
+
+void
+ReferenceAttribute::update(DocId doc, const GlobalId &gid)
+{
+ updateUncommittedDocIdLimit(doc);
+ assert(doc < _indices.size());
+ EntryRef oldRef = _indices[doc];
+ EntryRef newRef = _store.add(gid);
+ std::atomic_thread_fence(std::memory_order_release);
+ _indices[doc] = newRef;
+ if (oldRef.valid()) {
+ _store.remove(oldRef);
+ }
+}
+
+const ReferenceAttribute::GlobalId *
+ReferenceAttribute::getReference(DocId doc)
+{
+ assert(doc < _indices.size());
+ EntryRef oldRef = _indices[doc];
+ if (!oldRef.valid()) {
+ return nullptr;
+ } else {
+ return &_store.get(oldRef);
+ }
+}
+
+bool
+ReferenceAttribute::considerCompact(const CompactionStrategy &compactionStrategy)
+{
+ size_t usedBytes = _cachedUniqueStoreMemoryUsage.usedBytes();
+ size_t deadBytes = _cachedUniqueStoreMemoryUsage.deadBytes();
+ bool compactMemory = ((deadBytes >= DEAD_BYTES_SLACK) &&
+ (usedBytes * compactionStrategy.getMaxDeadBytesRatio() < deadBytes));
+ if (compactMemory) {
+ compactWorst();
+ return true;
+ }
+ return false;
+}
+
+void
+ReferenceAttribute::compactWorst()
+{
+ datastore::ICompactionContext::UP compactionContext(_store.compactWorst());
+ if (compactionContext) {
+ compactionContext->compact(vespalib::ArrayRef<EntryRef>(&_indices[0],
+ _indices.size()));
+ }
+}
+
+uint64_t
+ReferenceAttribute::getUniqueValueCount() const
+{
+ return _store.getNumUniques();
+}
+
+ReferenceAttribute::IndicesCopyVector
+ReferenceAttribute::getIndicesCopy(uint32_t size) const
+{
+ assert(size <= _indices.size());
+ return IndicesCopyVector(&_indices[0], &_indices[0] + size);
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
new file mode 100644
index 00000000000..6bb24b34e01
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
@@ -0,0 +1,51 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "not_implemented_attribute.h"
+#include <vespa/document/base/globalid.h>
+#include <vespa/searchlib/datastore/unique_store.h>
+#include <vespa/searchlib/common/rcuvector.h>
+
+namespace search {
+namespace attribute {
+
+/*
+ * Attribute vector mapping from local document ids to global ids
+ * referencing external documents.
+ */
+class ReferenceAttribute : public NotImplementedAttribute
+{
+ using EntryRef = search::datastore::EntryRef;
+ using GlobalId = document::GlobalId;
+ datastore::UniqueStore<GlobalId> _store;
+ RcuVectorBase<EntryRef> _indices;
+ MemoryUsage _cachedUniqueStoreMemoryUsage;
+ using IndicesCopyVector = vespalib::Array<EntryRef>;
+
+ virtual void onAddDocs(DocId docIdLimit) override;
+ virtual bool onAddDoc(DocId doc);
+ virtual void removeOldGenerations(generation_t firstUsed) override;
+ virtual void onGenerationChange(generation_t generation) override;
+ virtual void onCommit() override;
+ virtual void onUpdateStat() override;
+ virtual std::unique_ptr<AttributeSaver> onInitSave() override;
+ virtual bool onLoad() override;
+ virtual uint64_t getUniqueValueCount() const override;
+
+ bool considerCompact(const CompactionStrategy &compactionStrategy);
+ void compactWorst();
+ IndicesCopyVector getIndicesCopy(uint32_t size) const;
+
+public:
+ ReferenceAttribute(const vespalib::stringref baseFileName,
+ const Config & cfg);
+ virtual ~ReferenceAttribute();
+ virtual bool addDoc(DocId &doc) override;
+ virtual uint32_t clearDoc(DocId doc) override;
+ void update(DocId doc, const GlobalId &gid);
+ const GlobalId *getReference(DocId doc);
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp
new file mode 100644
index 00000000000..6dd2db7d754
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.cpp
@@ -0,0 +1,80 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "reference_attribute_saver.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/vespalib/util/array.hpp>
+
+
+using vespalib::GenerationHandler;
+using document::GlobalId;
+using search::datastore::EntryRef;
+
+namespace search {
+namespace attribute {
+
+ReferenceAttributeSaver::
+ReferenceAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ IndicesCopyVector &&indices,
+ const Store &store)
+ : AttributeSaver(std::move(guard), cfg),
+ _indices(std::move(indices)),
+ _store(store),
+ _saver(store.getSaver())
+{
+}
+
+
+ReferenceAttributeSaver::~ReferenceAttributeSaver()
+{
+}
+
+namespace {
+
+template <class Store>
+class ValueWriter
+{
+ const Store &_store;
+ BufferWriter &_writer;
+public:
+ ValueWriter(const Store &store, BufferWriter &writer)
+ : _store(store),
+ _writer(writer)
+ {
+ }
+ void operator()(EntryRef ref) {
+ const GlobalId &gid = _store.get(ref);
+ _writer.write(&gid, sizeof(GlobalId));;
+ }
+};
+
+template <class Store, class Saver>
+void
+writeUdat(IAttributeSaveTarget &saveTarget, const Store &store, const Saver &saver)
+{
+ std::unique_ptr<BufferWriter>
+ udatWriter(saveTarget.udatWriter().allocBufferWriter());
+ saver.foreach_key(ValueWriter<Store>(store, *udatWriter));
+ udatWriter->flush();
+}
+
+}
+
+bool
+ReferenceAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
+{
+ writeUdat(saveTarget, _store, _saver);
+ std::unique_ptr<search::BufferWriter> datWriter(saveTarget.datWriter().
+ allocBufferWriter());
+
+ _saver.enumerateValues();
+ for (const auto &ref : _indices) {
+ uint32_t enumValue = _saver.mapEntryRefToEnumValue(ref);
+ datWriter->write(&enumValue, sizeof(uint32_t));
+ }
+ datWriter->flush();
+ return true;
+}
+
+} // namespace search::attribute
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.h
new file mode 100644
index 00000000000..b876eff9b86
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute_saver.h
@@ -0,0 +1,41 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributesaver.h"
+#include <vespa/document/base/globalid.h>
+#include <vespa/searchlib/datastore/unique_store.h>
+#include <vespa/searchlib/datastore/unique_store_saver.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include "iattributesavetarget.h"
+
+namespace search {
+namespace attribute {
+
+/*
+ * Class for saving a reference attribute.
+ */
+class ReferenceAttributeSaver : public AttributeSaver
+{
+private:
+ using EntryRef = search::datastore::EntryRef;
+ using GlobalId = document::GlobalId;
+ using IndicesCopyVector = vespalib::Array<EntryRef>;
+ using Store = datastore::UniqueStore<GlobalId, datastore::EntryRefT<22>>;
+ using Saver = datastore::UniqueStoreSaver<GlobalId, datastore::EntryRefT<22>>;
+ IndicesCopyVector _indices;
+ const Store &_store;
+ Saver _saver;
+
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) override;
+public:
+ ReferenceAttributeSaver(vespalib::GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ IndicesCopyVector &&indices,
+ const Store &store);
+
+ virtual ~ReferenceAttributeSaver();
+};
+
+} // namespace search::attribute
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store.cpp b/searchlib/src/vespa/searchlib/datastore/unique_store.cpp
index bcbe1be0360..17eb563947d 100644
--- a/searchlib/src/vespa/searchlib/datastore/unique_store.cpp
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store.cpp
@@ -7,6 +7,8 @@ namespace search {
namespace datastore {
template class UniqueStore<document::GlobalId, EntryRefT<22>>;
+template class UniqueStoreBuilder<document::GlobalId, EntryRefT<22>>;
+template class UniqueStoreSaver<document::GlobalId, EntryRefT<22>>;
} // namespace datastore
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store.h b/searchlib/src/vespa/searchlib/datastore/unique_store.h
index 60381f38889..ba4a1799eba 100644
--- a/searchlib/src/vespa/searchlib/datastore/unique_store.h
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store.h
@@ -1,4 +1,4 @@
-// Copyright 2017 Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
@@ -13,6 +13,12 @@
namespace search {
namespace datastore {
+template <typename EntryT, typename RefT>
+class UniqueStoreBuilder;
+
+template <typename EntryT, typename RefT>
+class UniqueStoreSaver;
+
/**
* Datastore for unique values of type EntryT that is accessed via a
* 32-bit EntryRef.
@@ -81,11 +87,15 @@ public:
MemoryUsage getMemoryUsage() const;
// Pass on hold list management to underlying store
- void transferHoldLists(generation_t generation) { _dict.getAllocator().transferHoldLists(generation); _store.transferHoldLists(generation); }
- void trimHoldLists(generation_t firstUsed) { _dict.getAllocator().trimHoldLists(firstUsed); _store.trimHoldLists(firstUsed); }
+ void transferHoldLists(generation_t generation);
+ void trimHoldLists(generation_t firstUsed);
vespalib::GenerationHolder &getGenerationHolder(void) { return _store.getGenerationHolder(); }
void setInitializing(bool initializing) { _store.setInitializing(initializing); }
- void freeze() { _dict.getAllocator().freeze(); }
+ void freeze();
+ uint32_t getNumUniques() const;
+
+ UniqueStoreBuilder<EntryType, RefType> getBuilder(uint32_t uniqueValuesHint);
+ UniqueStoreSaver<EntryType, RefType> getSaver() const;
// Should only be used for unit testing
const BufferState &bufferState(EntryRef ref) const;
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store.hpp b/searchlib/src/vespa/searchlib/datastore/unique_store.hpp
index 85ff503625f..9cc9c1db83d 100644
--- a/searchlib/src/vespa/searchlib/datastore/unique_store.hpp
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store.hpp
@@ -5,10 +5,14 @@
#include "unique_store.h"
#include "datastore.hpp"
#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
#include <vespa/searchlib/btree/btreeroot.hpp>
#include <vespa/searchlib/btree/btreenodeallocator.hpp>
#include <vespa/searchlib/btree/btreeiterator.hpp>
#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include "unique_store_builder.hpp"
+#include "unique_store_saver.hpp"
#include <atomic>
namespace search {
@@ -187,5 +191,50 @@ UniqueStore<EntryT, RefT>::bufferState(EntryRef ref) const
return _store.getBufferState(internalRef.bufferId());
}
+
+template <typename EntryT, typename RefT>
+void
+UniqueStore<EntryT, RefT>::transferHoldLists(generation_t generation)
+{
+ _dict.getAllocator().transferHoldLists(generation);
+ _store.transferHoldLists(generation);
+}
+
+template <typename EntryT, typename RefT>
+void
+UniqueStore<EntryT, RefT>::trimHoldLists(generation_t firstUsed)
+{
+ _dict.getAllocator().trimHoldLists(firstUsed);
+ _store.trimHoldLists(firstUsed);
+}
+
+template <typename EntryT, typename RefT>
+void
+UniqueStore<EntryT, RefT>::freeze()
+{
+ _dict.getAllocator().freeze();
+}
+
+template <typename EntryT, typename RefT>
+UniqueStoreBuilder<EntryT, RefT>
+UniqueStore<EntryT, RefT>::getBuilder(uint32_t uniqueValuesHint)
+{
+ return UniqueStoreBuilder<EntryType, RefType>(_store, _typeId, _dict, uniqueValuesHint);
+}
+
+template <typename EntryT, typename RefT>
+UniqueStoreSaver<EntryT, RefT>
+UniqueStore<EntryT, RefT>::getSaver() const
+{
+ return UniqueStoreSaver<EntryType, RefType>(_dict, _store);
+}
+
+template <typename EntryT, typename RefT>
+uint32_t
+UniqueStore<EntryT, RefT>::getNumUniques() const
+{
+ return _dict.getFrozenView().size();
+}
+
}
}
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store_builder.h b/searchlib/src/vespa/searchlib/datastore/unique_store_builder.h
new file mode 100644
index 00000000000..07e8ad3a218
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store_builder.h
@@ -0,0 +1,47 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "unique_store.h"
+
+namespace search {
+namespace datastore {
+
+/**
+ * Builder for related UniqueStore class.
+ *
+ * Contains utility method for adding new unique values and mapping from
+ * enum value to EntryRef value.
+ */
+template <typename EntryT, typename RefT>
+class UniqueStoreBuilder {
+ using UniqueStoreType = UniqueStore<EntryT, RefT>;
+ using DataStoreType = typename UniqueStoreType::DataStoreType;
+ using Dictionary = typename UniqueStoreType::Dictionary;
+ using EntryType = EntryT;
+ using RefType = RefT;
+
+ DataStoreType &_store;
+ uint32_t _typeId;
+ Dictionary &_dict;
+ std::vector<EntryRef> _refs;
+ std::vector<uint32_t> _refCounts;
+public:
+ UniqueStoreBuilder(DataStoreType &store, uint32_t typeId,
+ Dictionary &dict, uint32_t uniqueValuesHint);
+ ~UniqueStoreBuilder();
+ void setupRefCounts();
+ void makeDictionary();
+ void add(const EntryType &value) {
+ EntryRef newRef = _store.template allocator<EntryType>(_typeId).alloc(value).ref;
+ _refs.push_back(newRef);
+ }
+ EntryRef mapEnumValueToEntryRef(uint32_t enumValue) {
+ assert(enumValue < _refs.size());
+ ++_refCounts[enumValue];
+ return _refs[enumValue];
+ }
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store_builder.hpp b/searchlib/src/vespa/searchlib/datastore/unique_store_builder.hpp
new file mode 100644
index 00000000000..1fb95a7fed7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store_builder.hpp
@@ -0,0 +1,60 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "unique_store_builder.h"
+#include "datastore.hpp"
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+
+namespace search {
+namespace datastore {
+
+template <typename EntryT, typename RefT>
+UniqueStoreBuilder<EntryT, RefT>::UniqueStoreBuilder(DataStoreType &store, uint32_t typeId, Dictionary &dict, uint32_t uniqueValuesHint)
+ : _store(store),
+ _typeId(typeId),
+ _dict(dict),
+ _refs(),
+ _refCounts()
+{
+ _refs.reserve(uniqueValuesHint);
+ _refs.push_back(EntryRef());
+}
+
+template <typename EntryT, typename RefT>
+UniqueStoreBuilder<EntryT, RefT>::~UniqueStoreBuilder()
+{
+}
+
+template <typename EntryT, typename RefT>
+void
+UniqueStoreBuilder<EntryT, RefT>::setupRefCounts()
+{
+ _refCounts.resize(_refs.size());
+}
+
+
+template <typename EntryT, typename RefT>
+void
+UniqueStoreBuilder<EntryT, RefT>::makeDictionary()
+{
+ assert(_refs.size() == _refCounts.size());
+ assert(!_refs.empty());
+ typename Dictionary::Builder builder(_dict.getAllocator());
+ for (size_t i = 1; i < _refs.size(); ++i) {
+ if (_refCounts[i] != 0u) {
+ builder.insert(_refs[i], _refCounts[i]);
+ } else {
+ _store.holdElem(_refs[i], 1);
+ }
+ }
+ _dict.assign(builder);
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store_saver.h b/searchlib/src/vespa/searchlib/datastore/unique_store_saver.h
new file mode 100644
index 00000000000..530d36bc9d8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store_saver.h
@@ -0,0 +1,53 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "unique_store.h"
+
+namespace search {
+namespace datastore {
+
+/**
+ * Saver for related UniqueStore class.
+ *
+ * Contains utility methods for traversing all unique values (as
+ * EntryRef value) and mapping from EntryRef value to enum value.
+ */
+template <typename EntryT, typename RefT>
+class UniqueStoreSaver {
+ using UniqueStoreType = UniqueStore<EntryT, RefT>;
+ using Dictionary = typename UniqueStoreType::Dictionary;
+ using ConstIterator = typename Dictionary::ConstIterator;
+ using EntryType = EntryT;
+ using RefType = RefT;
+
+ ConstIterator _itr;
+ const DataStoreBase &_store;
+ std::vector<std::vector<uint32_t>> _enumValues;
+public:
+ UniqueStoreSaver(const Dictionary &dict, const DataStoreBase &store);
+ ~UniqueStoreSaver();
+ void enumerateValues();
+
+ template <typename Function>
+ void
+ foreach_key(Function &&func) const
+ {
+ _itr.foreach_key(func);
+ }
+
+ uint32_t mapEntryRefToEnumValue(EntryRef ref) const {
+ if (ref.valid()) {
+ RefType iRef(ref);
+ assert(iRef.offset() < _enumValues[iRef.bufferId()].size());
+ uint32_t enumValue = _enumValues[iRef.bufferId()][iRef.offset()];
+ assert(enumValue != 0);
+ return enumValue;
+ } else {
+ return 0u;
+ }
+ }
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store_saver.hpp b/searchlib/src/vespa/searchlib/datastore/unique_store_saver.hpp
new file mode 100644
index 00000000000..64b8338f88b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/datastore/unique_store_saver.hpp
@@ -0,0 +1,49 @@
+// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "unique_store_saver.h"
+
+namespace search {
+namespace datastore {
+
+template <typename EntryT, typename RefT>
+UniqueStoreSaver<EntryT, RefT>::UniqueStoreSaver(const Dictionary &dict, const DataStoreBase &store)
+ : _itr(),
+ _store(store)
+{
+ _itr = dict.getFrozenView().begin();
+}
+
+template <typename EntryT, typename RefT>
+UniqueStoreSaver<EntryT, RefT>::~UniqueStoreSaver()
+{
+}
+
+template <typename EntryT, typename RefT>
+void
+UniqueStoreSaver<EntryT, RefT>::enumerateValues()
+{
+ _enumValues.resize(RefType::numBuffers());
+ for (uint32_t bufferId = 0; bufferId < RefType::numBuffers(); ++bufferId) {
+ const BufferState &state = _store.getBufferState(bufferId);
+ if (state.isActive()) {
+ _enumValues[bufferId].resize(state.size());
+ }
+ }
+ ConstIterator it = _itr;
+ uint32_t nextEnumVal = 1;
+ while (it.valid()) {
+ RefType ref(it.getKey());
+ assert(ref.valid());
+ assert(ref.offset() < _enumValues[ref.bufferId()].size());
+ uint32_t &enumVal = _enumValues[ref.bufferId()][ref.offset()];
+ assert(enumVal == 0u);
+ enumVal = nextEnumVal;
+ ++it;
+ ++nextEnumVal;
+ }
+}
+
+}
+}