summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-03-27 16:58:19 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-03-27 16:58:19 +0000
commit7be3fed7f10a649b7e27caa5dc113903bb63791b (patch)
tree0f7a9b23dafbf81a45b82aacc811fbe94219f857 /searchlib
parent2cf7d6095c963af94b9de9c0b05ee93f9747e19d (diff)
Extend attribute save targets to support custom file writers.
This is a preparation for saving and loading nearest neighbor index in the tensor attribute code.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp11
-rw-r--r--searchlib/src/tests/attribute/save_target/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp148
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h24
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h13
11 files changed, 304 insertions, 30 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index aaf8f91387e..055dfc6645e 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -90,6 +90,7 @@ vespa_define_module(
src/tests/attribute/postinglist
src/tests/attribute/postinglistattribute
src/tests/attribute/reference_attribute
+ src/tests/attribute/save_target
src/tests/attribute/searchable
src/tests/attribute/searchcontext
src/tests/attribute/sourceselector
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
index bf829f6607a..41313fc7c53 100644
--- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -108,6 +108,17 @@ public:
}
IAttributeFileWriter &udatWriter() override { return _udatWriter; }
+ bool setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc) override {
+ (void) file_suffix;
+ (void) desc;
+ abort();
+ }
+ IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) override {
+ (void) file_suffix;
+ abort();
+ }
+
bool bufEqual(const Buffer &lhs, const Buffer &rhs) const;
bool operator==(const MemAttr &rhs) const;
diff --git a/searchlib/src/tests/attribute/save_target/CMakeLists.txt b/searchlib/src/tests/attribute/save_target/CMakeLists.txt
new file mode 100644
index 00000000000..e127f66579e
--- /dev/null
+++ b/searchlib/src/tests/attribute/save_target/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attribute_save_target_test_app TEST
+ SOURCES
+ attribute_save_target_test.cpp
+ DEPENDS
+ searchlib
+ gtest
+)
+vespa_add_test(NAME searchlib_attribute_save_target_test_app COMMAND searchlib_attribute_save_target_test_app)
diff --git a/searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp b/searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp
new file mode 100644
index 00000000000..c746a0aa120
--- /dev/null
+++ b/searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp
@@ -0,0 +1,148 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/attributefilesavetarget.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/test/directory_handler.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/util/bufferwriter.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_save_target_test");
+
+using namespace search;
+using namespace search::attribute;
+
+using search::index::DummyFileHeaderContext;
+using search::test::DirectoryHandler;
+
+const vespalib::string test_dir = "test_data/";
+
+class SaveTargetTest : public ::testing::Test {
+public:
+ DirectoryHandler dir_handler;
+ TuneFileAttributes tune_file;
+ DummyFileHeaderContext file_header_ctx;
+ IAttributeSaveTarget& target;
+ vespalib::string base_file_name;
+
+ SaveTargetTest(IAttributeSaveTarget& target_in)
+ : dir_handler(test_dir),
+ tune_file(),
+ file_header_ctx(),
+ target(target_in),
+ base_file_name(test_dir + "test_file")
+ {
+ }
+ ~SaveTargetTest() {}
+ void set_header(const vespalib::string& file_name) {
+ target.setHeader(AttributeHeader(file_name));
+ }
+ IAttributeFileWriter& setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc) {
+ bool res = target.setup_writer(file_suffix, desc);
+ assert(res);
+ return target.get_writer(file_suffix);
+ }
+ void setup_writer_and_fill(const vespalib::string& file_suffix,
+ const vespalib::string& desc,
+ int value) {
+ auto& writer = setup_writer(file_suffix, desc);
+ auto buf = writer.allocBufferWriter();
+ buf->write(&value, sizeof(int));
+ buf->flush();
+ }
+ void validate_loaded_file(const vespalib::string& file_suffix,
+ const vespalib::string& exp_desc,
+ int exp_value)
+ {
+ vespalib::string file_name = base_file_name + "." + file_suffix;
+ EXPECT_TRUE(vespalib::fileExists(file_name));
+ auto loaded = FileUtil::loadFile(file_name);
+ EXPECT_FALSE(loaded->empty());
+
+ const auto& header = loaded->getHeader();
+ EXPECT_EQ(file_name, header.getTag("fileName").asString());
+ EXPECT_EQ(exp_desc, header.getTag("desc").asString());
+
+ EXPECT_EQ(sizeof(int), loaded->size());
+ int act_value = (reinterpret_cast<const int*>(loaded->buffer()))[0];
+ EXPECT_EQ(exp_value, act_value);
+ }
+};
+
+class FileSaveTargetTest : public SaveTargetTest {
+public:
+ AttributeFileSaveTarget file_target;
+
+ FileSaveTargetTest()
+ : SaveTargetTest(file_target),
+ file_target(tune_file, file_header_ctx)
+ {
+ set_header(base_file_name);
+ }
+};
+
+TEST_F(FileSaveTargetTest, can_setup_and_return_writers)
+{
+ setup_writer_and_fill("my1", "desc 1", 123);
+ setup_writer_and_fill("my2", "desc 2", 456);
+ target.close();
+
+ validate_loaded_file("my1", "desc 1", 123);
+ validate_loaded_file("my2", "desc 2", 456);
+}
+
+TEST_F(FileSaveTargetTest, setup_fails_if_writer_already_exists)
+{
+ setup_writer("my", "my desc");
+ EXPECT_FALSE(target.setup_writer("my", "my desc"));
+}
+
+TEST_F(FileSaveTargetTest, get_throws_if_writer_does_not_exists)
+{
+ EXPECT_THROW(target.get_writer("na"), vespalib::IllegalArgumentException);
+}
+
+class MemorySaveTargetTest : public SaveTargetTest {
+public:
+ AttributeMemorySaveTarget memory_target;
+
+ MemorySaveTargetTest()
+ : SaveTargetTest(memory_target),
+ memory_target()
+ {
+ set_header(base_file_name);
+ }
+ void write_to_file() {
+ bool res = memory_target.writeToFile(tune_file, file_header_ctx);
+ ASSERT_TRUE(res);
+ }
+};
+
+TEST_F(MemorySaveTargetTest, can_setup_and_return_writers)
+{
+ setup_writer_and_fill("my1", "desc 1", 123);
+ setup_writer_and_fill("my2", "desc 2", 456);
+ write_to_file();
+
+ validate_loaded_file("my1", "desc 1", 123);
+ validate_loaded_file("my2", "desc 2", 456);
+}
+
+TEST_F(MemorySaveTargetTest, setup_fails_if_writer_already_exists)
+{
+ setup_writer("my", "my desc");
+ EXPECT_FALSE(target.setup_writer("my", "my desc"));
+}
+
+TEST_F(MemorySaveTargetTest, get_throws_if_writer_does_not_exists)
+{
+ EXPECT_THROW(target.get_writer("na"), vespalib::IllegalArgumentException);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
index 224d5758028..3d7010ba6c3 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
@@ -22,7 +22,12 @@ const vespalib::string predicateUpperBoundTag = "predicate.upper_bound";
}
AttributeHeader::AttributeHeader()
- : _fileName(""),
+ : AttributeHeader("")
+{
+}
+
+AttributeHeader::AttributeHeader(const vespalib::string &fileName)
+ : _fileName(fileName),
_basicType(attribute::BasicType::Type::NONE),
_collectionType(attribute::CollectionType::Type::SINGLE),
_tensorType(vespalib::eval::ValueType::error_type()),
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
index 303c469e755..24eac8336b4 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
@@ -35,6 +35,7 @@ private:
void internalExtractTags(const vespalib::GenericHeader &header);
public:
AttributeHeader();
+ AttributeHeader(const vespalib::string &fileName);
AttributeHeader(const vespalib::string &fileName,
BasicType basicType,
CollectionType collectionType,
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
index f57094ae592..f284fecbf98 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
@@ -3,14 +3,16 @@
#include "attributefilesavetarget.h"
#include "attributevector.h"
#include <vespa/searchlib/common/fileheadercontext.h>
-#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/util/error.h>
+#include <vespa/vespalib/util/exceptions.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.attribute.attributefilesavetarget");
using vespalib::getLastErrorString;
+using vespalib::IllegalArgumentException;
namespace search {
@@ -18,13 +20,16 @@ using common::FileHeaderContext;
AttributeFileSaveTarget::
-AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes,
- const FileHeaderContext &fileHeaderContext)
+AttributeFileSaveTarget(const TuneFileAttributes& tune_file,
+ const FileHeaderContext& file_header_ctx)
: IAttributeSaveTarget(),
- _datWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector data file"),
- _idxWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector idx file"),
- _weightWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector weight file"),
- _udatWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector unique data file")
+ _tune_file(tune_file),
+ _file_header_ctx(file_header_ctx),
+ _datWriter(tune_file, file_header_ctx, _header, "Attribute vector data file"),
+ _idxWriter(tune_file, file_header_ctx, _header, "Attribute vector idx file"),
+ _weightWriter(tune_file, file_header_ctx, _header, "Attribute vector weight file"),
+ _udatWriter(tune_file, file_header_ctx, _header, "Attribute vector unique data file"),
+ _writers()
{
}
@@ -66,23 +71,23 @@ AttributeFileSaveTarget::close()
_udatWriter.close();
_idxWriter.close();
_weightWriter.close();
+ for (auto& writer : _writers) {
+ writer.second->close();
+ }
}
-
IAttributeFileWriter &
AttributeFileSaveTarget::datWriter()
{
return _datWriter;
}
-
IAttributeFileWriter &
AttributeFileSaveTarget::idxWriter()
{
return _idxWriter;
}
-
IAttributeFileWriter &
AttributeFileSaveTarget::weightWriter()
{
@@ -95,6 +100,33 @@ AttributeFileSaveTarget::udatWriter()
return _udatWriter;
}
+bool
+AttributeFileSaveTarget::setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc)
+{
+ vespalib::string file_name(_header.getFileName() + "." + file_suffix);
+ auto writer = std::make_unique<AttributeFileWriter>(_tune_file, _file_header_ctx,
+ _header, desc);
+ if (!writer->open(file_name)) {
+ return false;
+ }
+ auto itr = _writers.find(file_suffix);
+ if (itr != _writers.end()) {
+ return false;
+ }
+ _writers.insert(std::make_pair(file_suffix, std::move(writer)));
+ return true;
+}
+
+IAttributeFileWriter&
+AttributeFileSaveTarget::get_writer(const vespalib::string& file_suffix)
+{
+ auto itr = _writers.find(file_suffix);
+ if (itr == _writers.end()) {
+ throw IllegalArgumentException("File writer with suffix '" + file_suffix + "' does not exist");
+ }
+ return *itr->second;
+}
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
index acb3daf82e0..9a9d38615ea 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
@@ -4,24 +4,30 @@
#include "iattributesavetarget.h"
#include "attributefilewriter.h"
+#include <vespa/vespalib/stllike/hash_fun.h>
+#include <unordered_map>
-namespace search
-{
+namespace search {
/**
* Class used to save an attribute vector to file(s).
**/
-class AttributeFileSaveTarget : public IAttributeSaveTarget
-{
+class AttributeFileSaveTarget : public IAttributeSaveTarget {
private:
+ using FileWriterUP = std::unique_ptr<AttributeFileWriter>;
+ using WriterMap = std::unordered_map<vespalib::string, FileWriterUP, vespalib::hash<vespalib::string>>;
+
+ const TuneFileAttributes& _tune_file;
+ const search::common::FileHeaderContext& _file_header_ctx;
AttributeFileWriter _datWriter;
AttributeFileWriter _idxWriter;
AttributeFileWriter _weightWriter;
AttributeFileWriter _udatWriter;
+ WriterMap _writers;
public:
- AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes,
- const search::common::FileHeaderContext &fileHeaderContext);
+ AttributeFileSaveTarget(const TuneFileAttributes& tune_file,
+ const search::common::FileHeaderContext& file_header_ctx);
~AttributeFileSaveTarget() override;
// Implements IAttributeSaveTarget
@@ -35,6 +41,11 @@ public:
IAttributeFileWriter &idxWriter() override;
IAttributeFileWriter &weightWriter() override;
IAttributeFileWriter &udatWriter() override;
+
+ bool setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc) override;
+ IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) override;
+
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
index 372168143ab..b28887691e5 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
@@ -1,24 +1,25 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "attributememorysavetarget.h"
#include "attributefilesavetarget.h"
+#include "attributememorysavetarget.h"
#include "attributevector.h"
+#include <vespa/vespalib/util/exceptions.h>
namespace search {
using search::common::FileHeaderContext;
+using vespalib::IllegalArgumentException;
AttributeMemorySaveTarget::AttributeMemorySaveTarget()
: _datWriter(),
_idxWriter(),
_weightWriter(),
- _udatWriter()
+ _udatWriter(),
+ _writers()
{
}
-AttributeMemorySaveTarget::~AttributeMemorySaveTarget() {
-}
-
+AttributeMemorySaveTarget::~AttributeMemorySaveTarget() = default;
IAttributeFileWriter &
AttributeMemorySaveTarget::datWriter()
@@ -26,28 +27,24 @@ AttributeMemorySaveTarget::datWriter()
return _datWriter;
}
-
IAttributeFileWriter &
AttributeMemorySaveTarget::idxWriter()
{
return _idxWriter;
}
-
IAttributeFileWriter &
AttributeMemorySaveTarget::weightWriter()
{
return _weightWriter;
}
-
IAttributeFileWriter &
AttributeMemorySaveTarget::udatWriter()
{
return _udatWriter;
}
-
bool
AttributeMemorySaveTarget::
writeToFile(const TuneFileAttributes &tuneFileAttributes,
@@ -68,9 +65,39 @@ writeToFile(const TuneFileAttributes &tuneFileAttributes,
_weightWriter.writeTo(saveTarget.weightWriter());
}
}
+ for (const auto& entry : _writers) {
+ if (!saveTarget.setup_writer(entry.first, entry.second.desc)) {
+ return false;
+ }
+ auto& file_writer = saveTarget.get_writer(entry.first);
+ entry.second.writer->writeTo(file_writer);
+ }
saveTarget.close();
return true;
}
+bool
+AttributeMemorySaveTarget::setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc)
+{
+ auto writer = std::make_unique<AttributeMemoryFileWriter>();
+ auto itr = _writers.find(file_suffix);
+ if (itr != _writers.end()) {
+ return false;
+ }
+ _writers.insert(std::make_pair(file_suffix, WriterEntry(std::move(writer), desc)));
+ return true;
+}
+
+IAttributeFileWriter&
+AttributeMemorySaveTarget::get_writer(const vespalib::string& file_suffix)
+{
+ auto itr = _writers.find(file_suffix);
+ if (itr == _writers.end()) {
+ throw IllegalArgumentException("File writer with suffix '" + file_suffix + "' does not exist");
+ }
+ return *itr->second.writer;
+}
+
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
index f06764fa34b..9533b881099 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
@@ -2,11 +2,13 @@
#pragma once
-#include "iattributesavetarget.h"
#include "attributememoryfilewriter.h"
+#include "iattributesavetarget.h"
+#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/stllike/hash_fun.h>
#include <memory>
-#include <vespa/searchlib/common/tunefileinfo.h>
+#include <unordered_map>
namespace search::common { class FileHeaderContext; }
@@ -16,13 +18,22 @@ class AttributeVector;
/**
* Class used to save an attribute vector to memory buffer(s).
**/
-class AttributeMemorySaveTarget : public IAttributeSaveTarget
-{
+class AttributeMemorySaveTarget : public IAttributeSaveTarget {
private:
+ using FileWriterUP = std::unique_ptr<AttributeMemoryFileWriter>;
+ struct WriterEntry {
+ FileWriterUP writer;
+ vespalib::string desc;
+ WriterEntry(FileWriterUP writer_in, const vespalib::string& desc_in)
+ : writer(std::move(writer_in)), desc(desc_in) {}
+ };
+ using WriterMap = std::unordered_map<vespalib::string, WriterEntry, vespalib::hash<vespalib::string>>;
+
AttributeMemoryFileWriter _datWriter;
AttributeMemoryFileWriter _idxWriter;
AttributeMemoryFileWriter _weightWriter;
AttributeMemoryFileWriter _udatWriter;
+ WriterMap _writers;
public:
AttributeMemorySaveTarget();
@@ -40,6 +51,11 @@ public:
IAttributeFileWriter &idxWriter() override;
IAttributeFileWriter &weightWriter() override;
IAttributeFileWriter &udatWriter() override;
+
+ bool setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc) override;
+ IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) override;
+
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h
index 9f90544bb83..8946fc2fcdb 100644
--- a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h
+++ b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h
@@ -37,6 +37,19 @@ public:
virtual IAttributeFileWriter &weightWriter() = 0;
virtual IAttributeFileWriter &udatWriter() = 0;
+ /**
+ * Setups a custom file writer with the given file suffix and description in the file header.
+ * Returns false if the file writer cannot be setup or if it already exists, true otherwise.
+ */
+ virtual bool setup_writer(const vespalib::string& file_suffix,
+ const vespalib::string& desc) = 0;
+
+ /**
+ * Returns the file writer with the given file suffix.
+ * Throws vespalib::IllegalArgumentException if the file writer does not exists.
+ */
+ virtual IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) = 0;
+
virtual ~IAttributeSaveTarget();
};