diff options
19 files changed, 514 insertions, 217 deletions
diff --git a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp index 6eab2c5bf3d..c814bdf3f37 100644 --- a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp +++ b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. // Unit tests for diskindexcleaner. -#include <vespa/searchcorespi/index/activediskindexes.h> +#include <vespa/searchcorespi/index/disk_indexes.h> #include <vespa/searchcorespi/index/diskindexcleaner.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/fastos/file.h> @@ -90,8 +90,8 @@ void createIndexes() { void Test::requireThatAllIndexesOlderThanLastFusionIsRemoved() { createIndexes(); - ActiveDiskIndexes active_indexes; - DiskIndexCleaner::clean(index_dir, active_indexes); + DiskIndexes disk_indexes; + DiskIndexCleaner::clean(index_dir, disk_indexes); vector<string> indexes = readIndexes(); EXPECT_EQUAL(3u, indexes.size()); EXPECT_TRUE(contains(indexes, "index.fusion.2")); @@ -101,17 +101,17 @@ void Test::requireThatAllIndexesOlderThanLastFusionIsRemoved() { void Test::requireThatIndexesInUseAreNotRemoved() { createIndexes(); - ActiveDiskIndexes active_indexes; - active_indexes.setActive(index_dir + "/index.fusion.1"); - active_indexes.setActive(index_dir + "/index.flush.2"); - DiskIndexCleaner::clean(index_dir, active_indexes); + DiskIndexes disk_indexes; + disk_indexes.setActive(index_dir + "/index.fusion.1", 0); + disk_indexes.setActive(index_dir + "/index.flush.2", 0); + DiskIndexCleaner::clean(index_dir, disk_indexes); vector<string> indexes = readIndexes(); EXPECT_TRUE(contains(indexes, "index.fusion.1")); EXPECT_TRUE(contains(indexes, "index.flush.2")); - active_indexes.notActive(index_dir + "/index.fusion.1"); - active_indexes.notActive(index_dir + "/index.flush.2"); - DiskIndexCleaner::clean(index_dir, active_indexes); + disk_indexes.notActive(index_dir + "/index.fusion.1"); + disk_indexes.notActive(index_dir + "/index.flush.2"); + DiskIndexCleaner::clean(index_dir, disk_indexes); indexes = readIndexes(); EXPECT_TRUE(!contains(indexes, "index.fusion.1")); EXPECT_TRUE(!contains(indexes, "index.flush.2")); @@ -120,8 +120,8 @@ void Test::requireThatIndexesInUseAreNotRemoved() { void Test::requireThatInvalidFlushIndexesAreRemoved() { createIndexes(); FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete(); - ActiveDiskIndexes active_indexes; - DiskIndexCleaner::clean(index_dir, active_indexes); + DiskIndexes disk_indexes; + DiskIndexCleaner::clean(index_dir, disk_indexes); vector<string> indexes = readIndexes(); EXPECT_EQUAL(2u, indexes.size()); EXPECT_TRUE(contains(indexes, "index.fusion.2")); @@ -131,8 +131,8 @@ void Test::requireThatInvalidFlushIndexesAreRemoved() { void Test::requireThatInvalidFusionIndexesAreRemoved() { createIndexes(); FastOS_File((index_dir + "/index.fusion.2/serial.dat").c_str()).Delete(); - ActiveDiskIndexes active_indexes; - DiskIndexCleaner::clean(index_dir, active_indexes); + DiskIndexes disk_indexes; + DiskIndexCleaner::clean(index_dir, disk_indexes); vector<string> indexes = readIndexes(); EXPECT_EQUAL(4u, indexes.size()); EXPECT_TRUE(contains(indexes, "index.fusion.1")); @@ -144,8 +144,8 @@ void Test::requireThatInvalidFusionIndexesAreRemoved() { void Test::requireThatRemoveDontTouchNewIndexes() { createIndexes(); FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete(); - ActiveDiskIndexes active_indexes; - DiskIndexCleaner::removeOldIndexes(index_dir, active_indexes); + DiskIndexes disk_indexes; + DiskIndexCleaner::removeOldIndexes(index_dir, disk_indexes); vector<string> indexes = readIndexes(); EXPECT_EQUAL(3u, indexes.size()); EXPECT_TRUE(contains(indexes, "index.fusion.2")); diff --git a/searchcorespi/CMakeLists.txt b/searchcorespi/CMakeLists.txt index 1029619150a..0bbaa813752 100644 --- a/searchcorespi/CMakeLists.txt +++ b/searchcorespi/CMakeLists.txt @@ -20,6 +20,6 @@ vespa_define_module( src/vespa/searchcorespi/index TESTS - src/tests/index/active_disk_indexes + src/tests/index/disk_indexes src/tests/index/index_disk_layout ) diff --git a/searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt b/searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt deleted file mode 100644 index e10ada381bf..00000000000 --- a/searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchcorespi_active_disk_indexes_test_app - SOURCES - active_disk_indexes_test.cpp - DEPENDS - searchcorespi - GTest::GTest -) -vespa_add_test(NAME searchcorespi_active_disk_indexes_test_app COMMAND searchcorespi_active_disk_indexes_test_app) diff --git a/searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp b/searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp deleted file mode 100644 index a6d412817d9..00000000000 --- a/searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/searchcorespi/index/activediskindexes.h> -#include <vespa/searchcorespi/index/index_disk_dir.h> -#include <vespa/vespalib/gtest/gtest.h> - -namespace searchcorespi::index { - -class ActiveDiskIndexesTest : public ::testing::Test, - public ActiveDiskIndexes -{ -protected: - ActiveDiskIndexesTest(); - ~ActiveDiskIndexesTest(); -}; - -ActiveDiskIndexesTest::ActiveDiskIndexesTest() - : ::testing::Test(), - ActiveDiskIndexes() -{ -} - -ActiveDiskIndexesTest::~ActiveDiskIndexesTest() = default; - -TEST_F(ActiveDiskIndexesTest, simple_set_active_works) -{ - EXPECT_FALSE(isActive("index.flush.1")); - setActive("index.flush.1"); - EXPECT_TRUE(isActive("index.flush.1")); - notActive("index.flush.1"); - EXPECT_FALSE(isActive("index.flush.1")); -} - -TEST_F(ActiveDiskIndexesTest, nested_set_active_works) -{ - setActive("index.flush.1"); - setActive("index.flush.1"); - EXPECT_TRUE(isActive("index.flush.1")); - notActive("index.flush.1"); - EXPECT_TRUE(isActive("index.flush.1")); - notActive("index.flush.1"); - EXPECT_FALSE(isActive("index.flush.1")); -} - -TEST_F(ActiveDiskIndexesTest, is_active_returns_false_for_bad_name) -{ - EXPECT_FALSE(isActive("foo/bar/baz")); - EXPECT_FALSE(isActive("index.flush.0")); -} - -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt b/searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt new file mode 100644 index 00000000000..81b6bb0e8a9 --- /dev/null +++ b/searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcorespi_disk_indexes_test_app + SOURCES + disk_indexes_test.cpp + DEPENDS + searchcorespi + GTest::GTest +) +vespa_add_test(NAME searchcorespi_disk_indexes_test_app COMMAND searchcorespi_disk_indexes_test_app) diff --git a/searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp b/searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp new file mode 100644 index 00000000000..d22ad499316 --- /dev/null +++ b/searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp @@ -0,0 +1,196 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcorespi/index/disk_indexes.h> +#include <vespa/searchcorespi/index/index_disk_dir.h> +#include <vespa/searchcorespi/index/indexdisklayout.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/util/size_literals.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <fstream> + +namespace { + +vespalib::string base_dir("base"); + +constexpr uint32_t block_size = 4_Ki; + +} + +namespace searchcorespi::index { + +class DiskIndexesTest : public ::testing::Test, + public DiskIndexes +{ + IndexDiskLayout _layout; +protected: + DiskIndexesTest(); + ~DiskIndexesTest(); + + static IndexDiskDir get_index_disk_dir(const vespalib::string& dir) { + return IndexDiskLayout::get_index_disk_dir(dir); + } + + void assert_transient_size(uint64_t exp, IndexDiskDir index_disk_dir) { + EXPECT_EQ(exp, get_transient_size(_layout, index_disk_dir)); + } +}; + +DiskIndexesTest::DiskIndexesTest() + : ::testing::Test(), + DiskIndexes(), + _layout(base_dir) +{ +} + +DiskIndexesTest::~DiskIndexesTest() = default; + +TEST_F(DiskIndexesTest, simple_set_active_works) +{ + EXPECT_FALSE(isActive("index.flush.1")); + setActive("index.flush.1", 0); + EXPECT_TRUE(isActive("index.flush.1")); + notActive("index.flush.1"); + EXPECT_FALSE(isActive("index.flush.1")); +} + +TEST_F(DiskIndexesTest, nested_set_active_works) +{ + setActive("index.flush.1", 0); + setActive("index.flush.1", 0); + EXPECT_TRUE(isActive("index.flush.1")); + notActive("index.flush.1"); + EXPECT_TRUE(isActive("index.flush.1")); + notActive("index.flush.1"); + EXPECT_FALSE(isActive("index.flush.1")); +} + +TEST_F(DiskIndexesTest, is_active_returns_false_for_bad_name) +{ + EXPECT_FALSE(isActive("foo/bar/baz")); + EXPECT_FALSE(isActive("index.flush.0")); +} + +TEST_F(DiskIndexesTest, remove_works) +{ + EXPECT_TRUE(remove(IndexDiskDir())); + auto fusion1 = get_index_disk_dir("index.fusion.1"); + EXPECT_TRUE(remove(fusion1)); + add_not_active(fusion1); + EXPECT_TRUE(remove(fusion1)); + setActive("index.fusion.1", 0); + EXPECT_FALSE(remove(fusion1)); + notActive("index.fusion.1"); + EXPECT_TRUE(remove(fusion1)); +} + +TEST_F(DiskIndexesTest, basic_get_transient_size_works) +{ + /* + * When starting to use a new fusion index, we have a transient + * period with two ISearchableIndexCollection instances: + * - old, containing index.fusion.1 and index.flush.2 + * - new, containing index.fusion.2 + */ + setActive("index.fusion.1", 1000000); + setActive("index.flush.2", 500000); + setActive("index.fusion.2", 1200000); + auto fusion1 = get_index_disk_dir("index.fusion.1"); + auto flush2 = get_index_disk_dir("index.flush.2"); + auto fusion2 = get_index_disk_dir("index.fusion.2"); + { + /* + * When using the old index collection, disk space used by + * index.fusion.2 is considered transient. + */ + SCOPED_TRACE("index.fusion.1"); + assert_transient_size(1200000, fusion1); + } + { + SCOPED_TRACE("index.flush.2"); + assert_transient_size(0, flush2); + } + { + /* + * When using the new index collection, disk space used by + * index.fusion.1 and index.flush.2 is considered transient. + */ + SCOPED_TRACE("index.fusion.2"); + assert_transient_size(1500000, fusion2); + } + notActive("index.fusion.1"); + notActive("index.flush.2"); + { + /* + * old index collection removed. + */ + SCOPED_TRACE("index.fusion.2 after remove of index.fusion.1 and index.flush.1"); + assert_transient_size(0, fusion2); + } +} + +TEST_F(DiskIndexesTest, get_transient_size_during_ongoing_fusion) +{ + /* + * During ongoing fusion, we have one ISearchableIndexCollection instance: + * - old, containing index.fusion.1 and index.flush.2 + * + * Fusion output directory is index.fusion.2 + */ + setActive("index.fusion.1", 1000000); + setActive("index.flush.2", 500000); + auto fusion1 = get_index_disk_dir("index.fusion.1"); + auto fusion2 = get_index_disk_dir("index.fusion.2"); + add_not_active(fusion2); // start tracking disk space for fusion output + { + /* + * Fusion not yet started. + */ + SCOPED_TRACE("dir missing"); + assert_transient_size(0, fusion1); + } + auto dir = base_dir + "/index.fusion.2"; + vespalib::mkdir(dir, true); + { + /* + * Fusion started, but no files written yet. + */ + SCOPED_TRACE("empty dir"); + assert_transient_size(0, fusion1); + } + constexpr uint32_t seek_pos = 999999; + { + std::string name = dir + "/foo"; + std::ofstream ostr(name, std::ios::binary); + ostr.seekp(seek_pos); + ostr.write(" ", 1); + ostr.flush(); + ostr.close(); + } + { + /* + * Fusion started, one file written. + */ + SCOPED_TRACE("single file"); + assert_transient_size((seek_pos + block_size) / block_size * block_size, fusion1); + } + EXPECT_TRUE(remove(fusion2)); // stop tracking disk space for fusion output + { + /* + * Fusion aborted. + */ + SCOPED_TRACE("removed"); + assert_transient_size(0, fusion1); + } +} + +} + +int +main(int argc, char* argv[]) +{ + vespalib::rmdir(base_dir, true); + ::testing::InitGoogleTest(&argc, argv); + auto result = RUN_ALL_TESTS(); + vespalib::rmdir(base_dir, true); + return result; +} diff --git a/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt b/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt index 1987304dc7e..3995eb836fd 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt +++ b/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt @@ -1,14 +1,14 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchcorespi_index OBJECT SOURCES - activediskindexes.cpp diskindexcleaner.cpp + disk_indexes.cpp disk_index_stats.cpp eventlogger.cpp fusionrunner.cpp iindexmanager.cpp iindexcollection.cpp - index_disk_dir_active_state.cpp + index_disk_dir_state.cpp index_manager_explorer.cpp index_manager_stats.cpp indexcollection.cpp diff --git a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp b/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp deleted file mode 100644 index fb9585fb58e..00000000000 --- a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "activediskindexes.h" -#include "indexdisklayout.h" -#include "index_disk_dir.h" -#include "index_disk_dir_active_state.h" -#include <cassert> - -using vespalib::string; - -namespace searchcorespi::index { - -ActiveDiskIndexes::ActiveDiskIndexes() = default; -ActiveDiskIndexes::~ActiveDiskIndexes() = default; - -void ActiveDiskIndexes::setActive(const string &index) { - auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index); - assert(index_disk_dir.valid()); - std::lock_guard lock(_lock); - auto insres = _active.insert(std::make_pair(index_disk_dir, IndexDiskDirActiveState())); - insres.first->second.activate(); -} - -void ActiveDiskIndexes::notActive(const string & index) { - auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index); - assert(index_disk_dir.valid()); - std::lock_guard lock(_lock); - auto it = _active.find(index_disk_dir); - assert(it != _active.end()); - assert(it->second.is_active()); - it->second.deactivate(); - if (!it->second.is_active()) { - _active.erase(it); - } -} - -bool ActiveDiskIndexes::isActive(const string &index) const { - auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index); - if (!index_disk_dir.valid()) { - return false; - } - std::lock_guard lock(_lock); - auto it = _active.find(index_disk_dir); - return (it != _active.end()) && it->second.is_active(); -} - -} diff --git a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h b/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h deleted file mode 100644 index 365025e7450..00000000000 --- a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/stllike/string.h> -#include <map> -#include <mutex> -#include <memory> - -namespace searchcorespi::index { - -class IndexDiskDir; -class IndexDiskDirActiveState; - -/** - * Class used to keep track of the set of active disk indexes in an index maintainer. - * The index directories are used as identifiers. - */ -class ActiveDiskIndexes { - std::map<IndexDiskDir, IndexDiskDirActiveState> _active; - mutable std::mutex _lock; - -public: - using SP = std::shared_ptr<ActiveDiskIndexes>; - ActiveDiskIndexes(); - ~ActiveDiskIndexes(); - ActiveDiskIndexes(const ActiveDiskIndexes &) = delete; - ActiveDiskIndexes & operator = (const ActiveDiskIndexes &) = delete; - void setActive(const vespalib::string & index); - void notActive(const vespalib::string & index); - bool isActive(const vespalib::string & index) const; -}; - -} diff --git a/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp new file mode 100644 index 00000000000..28f6a886d06 --- /dev/null +++ b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp @@ -0,0 +1,131 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "disk_indexes.h" +#include "indexdisklayout.h" +#include "index_disk_dir.h" +#include "index_disk_dir_state.h" +#include <vespa/searchlib/util/dirtraverse.h> +#include <cassert> +#include <vector> + +using vespalib::string; + +namespace searchcorespi::index { + +DiskIndexes::DiskIndexes() = default; +DiskIndexes::~DiskIndexes() = default; + +void +DiskIndexes::setActive(const string &index, uint64_t size_on_disk) +{ + auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index); + assert(index_disk_dir.valid()); + std::lock_guard lock(_lock); + auto insres = _active.insert(std::make_pair(index_disk_dir, IndexDiskDirState())); + insres.first->second.activate(); + if (!insres.first->second.get_size_on_disk().has_value()) { + insres.first->second.set_size_on_disk(size_on_disk); + } +} + +void DiskIndexes::notActive(const string & index) { + auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index); + assert(index_disk_dir.valid()); + std::lock_guard lock(_lock); + auto it = _active.find(index_disk_dir); + assert(it != _active.end()); + assert(it->second.is_active()); + it->second.deactivate(); + if (!it->second.is_active()) { + _active.erase(it); + } +} + +bool DiskIndexes::isActive(const string &index) const { + auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index); + if (!index_disk_dir.valid()) { + return false; + } + std::lock_guard lock(_lock); + auto it = _active.find(index_disk_dir); + return (it != _active.end()) && it->second.is_active(); +} + + +void +DiskIndexes::add_not_active(IndexDiskDir index_disk_dir) +{ + std::lock_guard lock(_lock); + _active.insert(std::make_pair(index_disk_dir, IndexDiskDirState())); +} + +bool +DiskIndexes::remove(IndexDiskDir index_disk_dir) +{ + if (!index_disk_dir.valid()) { + return true; + } + std::lock_guard lock(_lock); + auto it = _active.find(index_disk_dir); + if (it == _active.end()) { + return true; + } + if (it->second.is_active()) { + return false; + } + _active.erase(it); + return true; +} + +uint64_t +DiskIndexes::get_transient_size(IndexDiskLayout& layout, IndexDiskDir index_disk_dir) const +{ + /* + * Only report transient size related to a valid fusion index. This ensures + * that transient size is reported once per index collection. + */ + if (!index_disk_dir.valid() || !index_disk_dir.is_fusion_index()) { + return 0u; + } + uint64_t transient_size = 0u; + std::vector<IndexDiskDir> deferred; + { + std::lock_guard lock(_lock); + for (auto &entry : _active) { + if (entry.first < index_disk_dir) { + /* + * Indexes before current fusion index are on the way out and + * will be removed when all older index collections + * referencing them are destroyed. Disk space used by these + * indexes is considered transient. + */ + if (entry.second.get_size_on_disk().has_value()) { + transient_size += entry.second.get_size_on_disk().value(); + } + } + if (index_disk_dir < entry.first && entry.first.is_fusion_index()) { + /* + * Fusion indexes after current fusion index can be partially + * complete and might be removed if fusion is aborted. Disk + * space used by these indexes is consider transient. + */ + if (entry.second.get_size_on_disk().has_value()) { + transient_size += entry.second.get_size_on_disk().value(); + } else { + deferred.emplace_back(entry.first); + } + } + } + } + for (auto& entry : deferred) { + auto index_dir = layout.getFusionDir(entry.get_id()); + try { + search::DirectoryTraverse dirt(index_dir.c_str()); + transient_size += dirt.GetTreeSize(); + } catch (std::exception &) { + } + } + return transient_size; +} + +} diff --git a/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h new file mode 100644 index 00000000000..842c1814faf --- /dev/null +++ b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <map> +#include <mutex> +#include <memory> + +namespace searchcorespi::index { + +class IndexDiskDir; +class IndexDiskDirState; +class IndexDiskLayout; + +/** + * Class used to keep track of the set of disk indexes in an index maintainer. + * The index directories are used as identifiers. + * + * DiskIndexCleaner will remove old disk indexes not marked active, + * i.e. old disk indexes used by old index collections are not removed. + * + * At start of fusion, an entry for fusion output index is added, to allow for + * tracking of transient disk use while fusion is ongoing. If fusion fails then + * the entry is removed, otherwise the entry is marked active as a side effect + * of setting up a new index collection. + */ +class DiskIndexes { + std::map<IndexDiskDir, IndexDiskDirState> _active; + mutable std::mutex _lock; + +public: + using SP = std::shared_ptr<DiskIndexes>; + DiskIndexes(); + ~DiskIndexes(); + DiskIndexes(const DiskIndexes &) = delete; + DiskIndexes & operator = (const DiskIndexes &) = delete; + void setActive(const vespalib::string & index, uint64_t size_on_disk); + void notActive(const vespalib::string & index); + bool isActive(const vespalib::string & index) const; + void add_not_active(IndexDiskDir index_disk_dir); + bool remove(IndexDiskDir index_disk_dir); + uint64_t get_transient_size(IndexDiskLayout& layout, IndexDiskDir index_disk_dir) const; +}; + +} diff --git a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp index cce880eed3f..3bed7ea8ea7 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp @@ -1,7 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "diskindexcleaner.h" -#include "activediskindexes.h" +#include "disk_indexes.h" +#include "indexdisklayout.h" +#include "index_disk_dir.h" #include <vespa/fastos/file.h> #include <vespa/vespalib/io/fileutil.h> #include <sstream> @@ -80,13 +82,13 @@ bool isOldIndex(const string &index, uint32_t last_fusion_id) { } void removeOld(const string &base_dir, const vector<string> &indexes, - const ActiveDiskIndexes &active_indexes, bool remove) { + DiskIndexes &disk_indexes, bool remove) { uint32_t last_fusion_id = findLastFusionId(base_dir, indexes); for (size_t i = 0; i < indexes.size(); ++i) { const string index_dir = base_dir + "/" + indexes[i]; + auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(indexes[i]); if (isOldIndex(indexes[i], last_fusion_id) && - !active_indexes.isActive(index_dir)) - { + disk_indexes.remove(index_disk_dir)) { if (remove) { removeDir(index_dir); } else { @@ -108,16 +110,16 @@ void removeInvalid(const string &base_dir, const vector<string> &indexes) { } // namespace void DiskIndexCleaner::clean(const string &base_dir, - const ActiveDiskIndexes &active_indexes) { + DiskIndexes &disk_indexes) { vector<string> indexes = readIndexes(base_dir); - removeOld(base_dir, indexes, active_indexes, false); + removeOld(base_dir, indexes, disk_indexes, false); removeInvalid(base_dir, indexes); } void DiskIndexCleaner::removeOldIndexes( - const string &base_dir, const ActiveDiskIndexes &active_indexes) { + const string &base_dir, DiskIndexes &disk_indexes) { vector<string> indexes = readIndexes(base_dir); - removeOld(base_dir, indexes, active_indexes, true); + removeOld(base_dir, indexes, disk_indexes, true); } } diff --git a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h index 798193ab00b..cbd3a5aa94f 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h +++ b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h @@ -6,7 +6,7 @@ namespace searchcorespi { namespace index { -class ActiveDiskIndexes; +class DiskIndexes; /** * Utility class used to clean and remove index directories. @@ -16,9 +16,9 @@ struct DiskIndexCleaner { * Deletes all indexes with id lower than the most recent fusion id. */ static void clean(const vespalib::string &index_dir, - const ActiveDiskIndexes& active_indexes); + DiskIndexes& disk_indexes); static void removeOldIndexes(const vespalib::string &index_dir, - const ActiveDiskIndexes& active_indexes); + DiskIndexes& disk_indexes); }; } // namespace index diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h index 278fd73e555..335838ddf2e 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h +++ b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h @@ -29,6 +29,8 @@ public: return (_id == rhs._id) && (_fusion == rhs._fusion); } bool valid() const noexcept { return _id != 0u; } + bool is_fusion_index() const noexcept { return _fusion; } + uint64_t get_id() const noexcept { return _id; } }; } diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h deleted file mode 100644 index ac84de5adee..00000000000 --- a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <cstdint> - -namespace searchcorespi::index { - -/* - * Class describing active state for a disk index directory. - */ -class IndexDiskDirActiveState { - uint32_t _active_count; -public: - IndexDiskDirActiveState() - : _active_count(0) - { - } - - void activate() noexcept { ++_active_count; } - void deactivate() noexcept; - bool is_active() const noexcept { return _active_count != 0; } -}; - -} diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.cpp b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.cpp index 603971c866e..ffe33d704c8 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.cpp @@ -1,12 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "index_disk_dir_active_state.h" +#include "index_disk_dir_state.h" #include <cassert> namespace searchcorespi::index { void -IndexDiskDirActiveState::deactivate() noexcept +IndexDiskDirState::deactivate() noexcept { assert(_active_count > 0u); --_active_count; diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h new file mode 100644 index 00000000000..d8b790b3960 --- /dev/null +++ b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> +#include <optional> + +namespace searchcorespi::index { + +/* + * Class describing state for a disk index directory. + */ +class IndexDiskDirState { + uint32_t _active_count; + std::optional<uint64_t> _size_on_disk; +public: + IndexDiskDirState() + : _active_count(0), + _size_on_disk() + { + } + + void activate() noexcept { ++_active_count; } + void deactivate() noexcept; + bool is_active() const noexcept { return _active_count != 0; } + const std::optional<uint64_t>& get_size_on_disk() const noexcept { return _size_on_disk; } + void set_size_on_disk(uint64_t size_on_disk) noexcept { _size_on_disk = size_on_disk; } +}; + +} diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp index 6489cb156ce..afe5b573f21 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp @@ -8,6 +8,7 @@ #include "indexmaintainer.h" #include "indexreadutilities.h" #include "indexwriteutilities.h" +#include "index_disk_dir.h" #include <vespa/searchcorespi/flush/lambdaflushtask.h> #include <vespa/searchlib/common/i_flush_token.h> #include <vespa/searchlib/index/schemautil.h> @@ -88,13 +89,22 @@ class DiskIndexWithDestructorCallback : public IDiskIndex { private: std::shared_ptr<IDestructorCallback> _callback; IDiskIndex::SP _index; + IndexDiskDir _index_disk_dir; + IndexDiskLayout& _layout; + DiskIndexes& _disk_indexes; public: DiskIndexWithDestructorCallback(IDiskIndex::SP index, - std::shared_ptr<IDestructorCallback> callback) noexcept + std::shared_ptr<IDestructorCallback> callback, + IndexDiskLayout& layout, + DiskIndexes& disk_indexes) noexcept : _callback(std::move(callback)), - _index(std::move(index)) - { } + _index(std::move(index)), + _index_disk_dir(IndexDiskLayout::get_index_disk_dir(_index->getIndexDir())), + _layout(layout), + _disk_indexes(disk_indexes) + { + } ~DiskIndexWithDestructorCallback() override; const IDiskIndex &getWrapped() const { return *_index; } @@ -117,8 +127,7 @@ public: { return _index->createBlueprint(requestContext, fields, term); } - // TODO: Calculate the total disk size of current fusion indexes and set fusion_size_on_disk(). - search::SearchableStats getSearchableStats() const override { return _index->getSearchableStats(); } + search::SearchableStats getSearchableStats() const override; search::SerialNum getSerialNum() const override { return _index->getSerialNum(); } @@ -143,6 +152,16 @@ public: DiskIndexWithDestructorCallback::~DiskIndexWithDestructorCallback() = default; +search::SearchableStats +DiskIndexWithDestructorCallback::getSearchableStats() const +{ + auto stats = _index->getSearchableStats(); + uint64_t transient_size = _disk_indexes.get_transient_size(_layout, _index_disk_dir); + stats.fusion_size_on_disk(transient_size); + return stats; +} + + } // namespace IndexMaintainer::FusionArgs::FusionArgs() @@ -272,7 +291,7 @@ IndexMaintainer::updateActiveFusionPrunedSchema(const Schema &schema) void IndexMaintainer::deactivateDiskIndexes(vespalib::string indexDir) { - _active_indexes->notActive(indexDir); + _disk_indexes->notActive(indexDir); removeOldDiskIndexes(); } @@ -284,10 +303,13 @@ IndexMaintainer::loadDiskIndex(const string &indexDir) EventLogger::diskIndexLoadStart(indexDir); } vespalib::Timer timer; - _active_indexes->setActive(indexDir); + auto index = _operations.loadDiskIndex(indexDir); + auto stats = index->getSearchableStats(); + _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared<DiskIndexWithDestructorCallback>( - _operations.loadDiskIndex(indexDir), - makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); })); + std::move(index), + makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); }), + _layout, *_disk_indexes); if (LOG_WOULD_LOG(event)) { EventLogger::diskIndexLoadComplete(indexDir, vespalib::count_ms(timer.elapsed())); } @@ -303,11 +325,14 @@ IndexMaintainer::reloadDiskIndex(const IDiskIndex &oldIndex) EventLogger::diskIndexLoadStart(indexDir); } vespalib::Timer timer; - _active_indexes->setActive(indexDir); const IDiskIndex &wrappedDiskIndex = (dynamic_cast<const DiskIndexWithDestructorCallback &>(oldIndex)).getWrapped(); + auto index = _operations.reloadDiskIndex(wrappedDiskIndex); + auto stats = index->getSearchableStats(); + _disk_indexes->setActive(indexDir, stats.sizeOnDisk()); auto retval = std::make_shared<DiskIndexWithDestructorCallback>( - _operations.reloadDiskIndex(wrappedDiskIndex), - makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); })); + std::move(index), + makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); }), + _layout, *_disk_indexes); if (LOG_WOULD_LOG(event)) { EventLogger::diskIndexLoadComplete(indexDir, vespalib::count_ms(timer.elapsed())); } @@ -844,7 +869,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config, IIndexMaintainerOperations &operations) : _base_dir(config.getBaseDir()), _warmupConfig(config.getWarmup()), - _active_indexes(std::make_shared<ActiveDiskIndexes>()), + _disk_indexes(std::make_shared<DiskIndexes>()), _layout(config.getBaseDir()), _schema(config.getSchema()), _activeFusionSchema(), @@ -877,7 +902,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config, { // Called by document db init executor thread _changeGens.bumpPruneGen(); - DiskIndexCleaner::clean(_base_dir, *_active_indexes); + DiskIndexCleaner::clean(_base_dir, *_disk_indexes); FusionSpec spec = IndexReadUtilities::readFusionSpec(_base_dir); _next_id = 1 + (spec.flush_ids.empty() ? spec.last_fusion_id : spec.flush_ids.back()); _last_fusion_id = spec.last_fusion_id; @@ -1013,6 +1038,27 @@ IndexMaintainer::doFusion(SerialNum serialNum, std::shared_ptr<search::IFlushTok return getFusionDir(new_fusion_id); } +namespace { + +class RemoveFusionIndexGuard { + DiskIndexes* _disk_indexes; + IndexDiskDir _index_disk_dir; +public: + RemoveFusionIndexGuard(DiskIndexes& disk_indexes, IndexDiskDir index_disk_dir) + : _disk_indexes(&disk_indexes), + _index_disk_dir(index_disk_dir) + { + _disk_indexes->add_not_active(index_disk_dir); + } + ~RemoveFusionIndexGuard() { + if (_disk_indexes != nullptr) { + (void) _disk_indexes->remove(_index_disk_dir); + } + } + void reset() { _disk_indexes = nullptr; } +}; + +} uint32_t IndexMaintainer::runFusion(const FusionSpec &fusion_spec, std::shared_ptr<search::IFlushToken> flush_token) @@ -1034,6 +1080,8 @@ IndexMaintainer::runFusion(const FusionSpec &fusion_spec, std::shared_ptr<search if (FastOS_File::Stat(lastSerialFile.c_str(), &statInfo)) { serialNum = IndexReadUtilities::readSerialNum(lastFlushDir); } + IndexDiskDir fusion_index_disk_dir(fusion_spec.flush_ids.back(), true); + RemoveFusionIndexGuard remove_fusion_index_guard(*_disk_indexes, fusion_index_disk_dir); FusionRunner fusion_runner(_base_dir, args._schema, tuneFileAttributes, _ctx.getFileHeaderContext()); uint32_t new_fusion_id = fusion_runner.fuse(fusion_spec, serialNum, _operations, flush_token); bool ok = (new_fusion_id != 0); @@ -1066,6 +1114,7 @@ IndexMaintainer::runFusion(const FusionSpec &fusion_spec, std::shared_ptr<search } ChangeGens changeGens = getChangeGens(); IDiskIndex::SP new_index(loadDiskIndex(new_fusion_dir)); + remove_fusion_index_guard.reset(); // Post processing after fusion operation has completed and new disk // index has been opened. @@ -1101,7 +1150,7 @@ void IndexMaintainer::removeOldDiskIndexes() { LockGuard slock(_remove_lock); - DiskIndexCleaner::removeOldIndexes(_base_dir, *_active_indexes); + DiskIndexCleaner::removeOldIndexes(_base_dir, *_disk_indexes); } IndexMaintainer::FlushStats diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h index 8213c02b90c..fa7a9145a3c 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h +++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h @@ -2,7 +2,7 @@ #pragma once #include "iindexmanager.h" -#include "activediskindexes.h" +#include "disk_indexes.h" #include "fusionspec.h" #include "idiskindex.h" #include "iindexmaintaineroperations.h" @@ -76,7 +76,7 @@ class IndexMaintainer : public IIndexManager, const vespalib::string _base_dir; const WarmupConfig _warmupConfig; - ActiveDiskIndexes::SP _active_indexes; + DiskIndexes::SP _disk_indexes; IndexDiskLayout _layout; Schema _schema; // Protected by SL + IUL Schema::SP _activeFusionSchema; // Protected by SL + IUL |