diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-03-27 16:58:19 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2020-03-27 16:58:19 +0000 |
commit | 7be3fed7f10a649b7e27caa5dc113903bb63791b (patch) | |
tree | 0f7a9b23dafbf81a45b82aacc811fbe94219f857 /searchlib | |
parent | 2cf7d6095c963af94b9de9c0b05ee93f9747e19d (diff) |
Extend attribute save targets to support custom file writers.
This is a preparation for saving and loading nearest neighbor index in the tensor attribute code.
Diffstat (limited to 'searchlib')
11 files changed, 304 insertions, 30 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index aaf8f91387e..055dfc6645e 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -90,6 +90,7 @@ vespa_define_module( src/tests/attribute/postinglist src/tests/attribute/postinglistattribute src/tests/attribute/reference_attribute + src/tests/attribute/save_target src/tests/attribute/searchable src/tests/attribute/searchcontext src/tests/attribute/sourceselector diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index bf829f6607a..41313fc7c53 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -108,6 +108,17 @@ public: } IAttributeFileWriter &udatWriter() override { return _udatWriter; } + bool setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) override { + (void) file_suffix; + (void) desc; + abort(); + } + IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) override { + (void) file_suffix; + abort(); + } + bool bufEqual(const Buffer &lhs, const Buffer &rhs) const; bool operator==(const MemAttr &rhs) const; diff --git a/searchlib/src/tests/attribute/save_target/CMakeLists.txt b/searchlib/src/tests/attribute/save_target/CMakeLists.txt new file mode 100644 index 00000000000..e127f66579e --- /dev/null +++ b/searchlib/src/tests/attribute/save_target/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attribute_save_target_test_app TEST + SOURCES + attribute_save_target_test.cpp + DEPENDS + searchlib + gtest +) +vespa_add_test(NAME searchlib_attribute_save_target_test_app COMMAND searchlib_attribute_save_target_test_app) diff --git a/searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp b/searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp new file mode 100644 index 00000000000..c746a0aa120 --- /dev/null +++ b/searchlib/src/tests/attribute/save_target/attribute_save_target_test.cpp @@ -0,0 +1,148 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/attribute/attributefilesavetarget.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/test/directory_handler.h> +#include <vespa/searchlib/util/fileutil.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/bufferwriter.h> +#include <vespa/vespalib/util/exceptions.h> + +#include <vespa/log/log.h> +LOG_SETUP("attribute_save_target_test"); + +using namespace search; +using namespace search::attribute; + +using search::index::DummyFileHeaderContext; +using search::test::DirectoryHandler; + +const vespalib::string test_dir = "test_data/"; + +class SaveTargetTest : public ::testing::Test { +public: + DirectoryHandler dir_handler; + TuneFileAttributes tune_file; + DummyFileHeaderContext file_header_ctx; + IAttributeSaveTarget& target; + vespalib::string base_file_name; + + SaveTargetTest(IAttributeSaveTarget& target_in) + : dir_handler(test_dir), + tune_file(), + file_header_ctx(), + target(target_in), + base_file_name(test_dir + "test_file") + { + } + ~SaveTargetTest() {} + void set_header(const vespalib::string& file_name) { + target.setHeader(AttributeHeader(file_name)); + } + IAttributeFileWriter& setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) { + bool res = target.setup_writer(file_suffix, desc); + assert(res); + return target.get_writer(file_suffix); + } + void setup_writer_and_fill(const vespalib::string& file_suffix, + const vespalib::string& desc, + int value) { + auto& writer = setup_writer(file_suffix, desc); + auto buf = writer.allocBufferWriter(); + buf->write(&value, sizeof(int)); + buf->flush(); + } + void validate_loaded_file(const vespalib::string& file_suffix, + const vespalib::string& exp_desc, + int exp_value) + { + vespalib::string file_name = base_file_name + "." + file_suffix; + EXPECT_TRUE(vespalib::fileExists(file_name)); + auto loaded = FileUtil::loadFile(file_name); + EXPECT_FALSE(loaded->empty()); + + const auto& header = loaded->getHeader(); + EXPECT_EQ(file_name, header.getTag("fileName").asString()); + EXPECT_EQ(exp_desc, header.getTag("desc").asString()); + + EXPECT_EQ(sizeof(int), loaded->size()); + int act_value = (reinterpret_cast<const int*>(loaded->buffer()))[0]; + EXPECT_EQ(exp_value, act_value); + } +}; + +class FileSaveTargetTest : public SaveTargetTest { +public: + AttributeFileSaveTarget file_target; + + FileSaveTargetTest() + : SaveTargetTest(file_target), + file_target(tune_file, file_header_ctx) + { + set_header(base_file_name); + } +}; + +TEST_F(FileSaveTargetTest, can_setup_and_return_writers) +{ + setup_writer_and_fill("my1", "desc 1", 123); + setup_writer_and_fill("my2", "desc 2", 456); + target.close(); + + validate_loaded_file("my1", "desc 1", 123); + validate_loaded_file("my2", "desc 2", 456); +} + +TEST_F(FileSaveTargetTest, setup_fails_if_writer_already_exists) +{ + setup_writer("my", "my desc"); + EXPECT_FALSE(target.setup_writer("my", "my desc")); +} + +TEST_F(FileSaveTargetTest, get_throws_if_writer_does_not_exists) +{ + EXPECT_THROW(target.get_writer("na"), vespalib::IllegalArgumentException); +} + +class MemorySaveTargetTest : public SaveTargetTest { +public: + AttributeMemorySaveTarget memory_target; + + MemorySaveTargetTest() + : SaveTargetTest(memory_target), + memory_target() + { + set_header(base_file_name); + } + void write_to_file() { + bool res = memory_target.writeToFile(tune_file, file_header_ctx); + ASSERT_TRUE(res); + } +}; + +TEST_F(MemorySaveTargetTest, can_setup_and_return_writers) +{ + setup_writer_and_fill("my1", "desc 1", 123); + setup_writer_and_fill("my2", "desc 2", 456); + write_to_file(); + + validate_loaded_file("my1", "desc 1", 123); + validate_loaded_file("my2", "desc 2", 456); +} + +TEST_F(MemorySaveTargetTest, setup_fails_if_writer_already_exists) +{ + setup_writer("my", "my desc"); + EXPECT_FALSE(target.setup_writer("my", "my desc")); +} + +TEST_F(MemorySaveTargetTest, get_throws_if_writer_does_not_exists) +{ + EXPECT_THROW(target.get_writer("na"), vespalib::IllegalArgumentException); +} + +GTEST_MAIN_RUN_ALL_TESTS() + diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index 224d5758028..3d7010ba6c3 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -22,7 +22,12 @@ const vespalib::string predicateUpperBoundTag = "predicate.upper_bound"; } AttributeHeader::AttributeHeader() - : _fileName(""), + : AttributeHeader("") +{ +} + +AttributeHeader::AttributeHeader(const vespalib::string &fileName) + : _fileName(fileName), _basicType(attribute::BasicType::Type::NONE), _collectionType(attribute::CollectionType::Type::SINGLE), _tensorType(vespalib::eval::ValueType::error_type()), diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h index 303c469e755..24eac8336b4 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h @@ -35,6 +35,7 @@ private: void internalExtractTags(const vespalib::GenericHeader &header); public: AttributeHeader(); + AttributeHeader(const vespalib::string &fileName); AttributeHeader(const vespalib::string &fileName, BasicType basicType, CollectionType collectionType, diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp index f57094ae592..f284fecbf98 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp @@ -3,14 +3,16 @@ #include "attributefilesavetarget.h" #include "attributevector.h" #include <vespa/searchlib/common/fileheadercontext.h> -#include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/data/databuffer.h> +#include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/util/error.h> +#include <vespa/vespalib/util/exceptions.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.attribute.attributefilesavetarget"); using vespalib::getLastErrorString; +using vespalib::IllegalArgumentException; namespace search { @@ -18,13 +20,16 @@ using common::FileHeaderContext; AttributeFileSaveTarget:: -AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes, - const FileHeaderContext &fileHeaderContext) +AttributeFileSaveTarget(const TuneFileAttributes& tune_file, + const FileHeaderContext& file_header_ctx) : IAttributeSaveTarget(), - _datWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector data file"), - _idxWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector idx file"), - _weightWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector weight file"), - _udatWriter(tuneFileAttributes, fileHeaderContext, _header, "Attribute vector unique data file") + _tune_file(tune_file), + _file_header_ctx(file_header_ctx), + _datWriter(tune_file, file_header_ctx, _header, "Attribute vector data file"), + _idxWriter(tune_file, file_header_ctx, _header, "Attribute vector idx file"), + _weightWriter(tune_file, file_header_ctx, _header, "Attribute vector weight file"), + _udatWriter(tune_file, file_header_ctx, _header, "Attribute vector unique data file"), + _writers() { } @@ -66,23 +71,23 @@ AttributeFileSaveTarget::close() _udatWriter.close(); _idxWriter.close(); _weightWriter.close(); + for (auto& writer : _writers) { + writer.second->close(); + } } - IAttributeFileWriter & AttributeFileSaveTarget::datWriter() { return _datWriter; } - IAttributeFileWriter & AttributeFileSaveTarget::idxWriter() { return _idxWriter; } - IAttributeFileWriter & AttributeFileSaveTarget::weightWriter() { @@ -95,6 +100,33 @@ AttributeFileSaveTarget::udatWriter() return _udatWriter; } +bool +AttributeFileSaveTarget::setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) +{ + vespalib::string file_name(_header.getFileName() + "." + file_suffix); + auto writer = std::make_unique<AttributeFileWriter>(_tune_file, _file_header_ctx, + _header, desc); + if (!writer->open(file_name)) { + return false; + } + auto itr = _writers.find(file_suffix); + if (itr != _writers.end()) { + return false; + } + _writers.insert(std::make_pair(file_suffix, std::move(writer))); + return true; +} + +IAttributeFileWriter& +AttributeFileSaveTarget::get_writer(const vespalib::string& file_suffix) +{ + auto itr = _writers.find(file_suffix); + if (itr == _writers.end()) { + throw IllegalArgumentException("File writer with suffix '" + file_suffix + "' does not exist"); + } + return *itr->second; +} } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h index acb3daf82e0..9a9d38615ea 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h +++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h @@ -4,24 +4,30 @@ #include "iattributesavetarget.h" #include "attributefilewriter.h" +#include <vespa/vespalib/stllike/hash_fun.h> +#include <unordered_map> -namespace search -{ +namespace search { /** * Class used to save an attribute vector to file(s). **/ -class AttributeFileSaveTarget : public IAttributeSaveTarget -{ +class AttributeFileSaveTarget : public IAttributeSaveTarget { private: + using FileWriterUP = std::unique_ptr<AttributeFileWriter>; + using WriterMap = std::unordered_map<vespalib::string, FileWriterUP, vespalib::hash<vespalib::string>>; + + const TuneFileAttributes& _tune_file; + const search::common::FileHeaderContext& _file_header_ctx; AttributeFileWriter _datWriter; AttributeFileWriter _idxWriter; AttributeFileWriter _weightWriter; AttributeFileWriter _udatWriter; + WriterMap _writers; public: - AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes, - const search::common::FileHeaderContext &fileHeaderContext); + AttributeFileSaveTarget(const TuneFileAttributes& tune_file, + const search::common::FileHeaderContext& file_header_ctx); ~AttributeFileSaveTarget() override; // Implements IAttributeSaveTarget @@ -35,6 +41,11 @@ public: IAttributeFileWriter &idxWriter() override; IAttributeFileWriter &weightWriter() override; IAttributeFileWriter &udatWriter() override; + + bool setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) override; + IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) override; + }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp index 372168143ab..b28887691e5 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp @@ -1,24 +1,25 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "attributememorysavetarget.h" #include "attributefilesavetarget.h" +#include "attributememorysavetarget.h" #include "attributevector.h" +#include <vespa/vespalib/util/exceptions.h> namespace search { using search::common::FileHeaderContext; +using vespalib::IllegalArgumentException; AttributeMemorySaveTarget::AttributeMemorySaveTarget() : _datWriter(), _idxWriter(), _weightWriter(), - _udatWriter() + _udatWriter(), + _writers() { } -AttributeMemorySaveTarget::~AttributeMemorySaveTarget() { -} - +AttributeMemorySaveTarget::~AttributeMemorySaveTarget() = default; IAttributeFileWriter & AttributeMemorySaveTarget::datWriter() @@ -26,28 +27,24 @@ AttributeMemorySaveTarget::datWriter() return _datWriter; } - IAttributeFileWriter & AttributeMemorySaveTarget::idxWriter() { return _idxWriter; } - IAttributeFileWriter & AttributeMemorySaveTarget::weightWriter() { return _weightWriter; } - IAttributeFileWriter & AttributeMemorySaveTarget::udatWriter() { return _udatWriter; } - bool AttributeMemorySaveTarget:: writeToFile(const TuneFileAttributes &tuneFileAttributes, @@ -68,9 +65,39 @@ writeToFile(const TuneFileAttributes &tuneFileAttributes, _weightWriter.writeTo(saveTarget.weightWriter()); } } + for (const auto& entry : _writers) { + if (!saveTarget.setup_writer(entry.first, entry.second.desc)) { + return false; + } + auto& file_writer = saveTarget.get_writer(entry.first); + entry.second.writer->writeTo(file_writer); + } saveTarget.close(); return true; } +bool +AttributeMemorySaveTarget::setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) +{ + auto writer = std::make_unique<AttributeMemoryFileWriter>(); + auto itr = _writers.find(file_suffix); + if (itr != _writers.end()) { + return false; + } + _writers.insert(std::make_pair(file_suffix, WriterEntry(std::move(writer), desc))); + return true; +} + +IAttributeFileWriter& +AttributeMemorySaveTarget::get_writer(const vespalib::string& file_suffix) +{ + auto itr = _writers.find(file_suffix); + if (itr == _writers.end()) { + throw IllegalArgumentException("File writer with suffix '" + file_suffix + "' does not exist"); + } + return *itr->second.writer; +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h index f06764fa34b..9533b881099 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h +++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h @@ -2,11 +2,13 @@ #pragma once -#include "iattributesavetarget.h" #include "attributememoryfilewriter.h" +#include "iattributesavetarget.h" +#include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/stllike/hash_fun.h> #include <memory> -#include <vespa/searchlib/common/tunefileinfo.h> +#include <unordered_map> namespace search::common { class FileHeaderContext; } @@ -16,13 +18,22 @@ class AttributeVector; /** * Class used to save an attribute vector to memory buffer(s). **/ -class AttributeMemorySaveTarget : public IAttributeSaveTarget -{ +class AttributeMemorySaveTarget : public IAttributeSaveTarget { private: + using FileWriterUP = std::unique_ptr<AttributeMemoryFileWriter>; + struct WriterEntry { + FileWriterUP writer; + vespalib::string desc; + WriterEntry(FileWriterUP writer_in, const vespalib::string& desc_in) + : writer(std::move(writer_in)), desc(desc_in) {} + }; + using WriterMap = std::unordered_map<vespalib::string, WriterEntry, vespalib::hash<vespalib::string>>; + AttributeMemoryFileWriter _datWriter; AttributeMemoryFileWriter _idxWriter; AttributeMemoryFileWriter _weightWriter; AttributeMemoryFileWriter _udatWriter; + WriterMap _writers; public: AttributeMemorySaveTarget(); @@ -40,6 +51,11 @@ public: IAttributeFileWriter &idxWriter() override; IAttributeFileWriter &weightWriter() override; IAttributeFileWriter &udatWriter() override; + + bool setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) override; + IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) override; + }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h index 9f90544bb83..8946fc2fcdb 100644 --- a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h +++ b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h @@ -37,6 +37,19 @@ public: virtual IAttributeFileWriter &weightWriter() = 0; virtual IAttributeFileWriter &udatWriter() = 0; + /** + * Setups a custom file writer with the given file suffix and description in the file header. + * Returns false if the file writer cannot be setup or if it already exists, true otherwise. + */ + virtual bool setup_writer(const vespalib::string& file_suffix, + const vespalib::string& desc) = 0; + + /** + * Returns the file writer with the given file suffix. + * Throws vespalib::IllegalArgumentException if the file writer does not exists. + */ + virtual IAttributeFileWriter& get_writer(const vespalib::string& file_suffix) = 0; + virtual ~IAttributeSaveTarget(); }; |