aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/src/tests/proton/index/diskindexcleaner_test.cpp32
-rw-r--r--searchcorespi/CMakeLists.txt2
-rw-r--r--searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt9
-rw-r--r--searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp53
-rw-r--r--searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt9
-rw-r--r--searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp196
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt4
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp47
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h34
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp131
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h46
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp18
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h6
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h2
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h25
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.cpp (renamed from searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.cpp)4
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h30
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp79
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h4
19 files changed, 514 insertions, 217 deletions
diff --git a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp
index 6eab2c5bf3d..c814bdf3f37 100644
--- a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp
+++ b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp
@@ -1,7 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
// Unit tests for diskindexcleaner.
-#include <vespa/searchcorespi/index/activediskindexes.h>
+#include <vespa/searchcorespi/index/disk_indexes.h>
#include <vespa/searchcorespi/index/diskindexcleaner.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/fastos/file.h>
@@ -90,8 +90,8 @@ void createIndexes() {
void Test::requireThatAllIndexesOlderThanLastFusionIsRemoved() {
createIndexes();
- ActiveDiskIndexes active_indexes;
- DiskIndexCleaner::clean(index_dir, active_indexes);
+ DiskIndexes disk_indexes;
+ DiskIndexCleaner::clean(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
EXPECT_EQUAL(3u, indexes.size());
EXPECT_TRUE(contains(indexes, "index.fusion.2"));
@@ -101,17 +101,17 @@ void Test::requireThatAllIndexesOlderThanLastFusionIsRemoved() {
void Test::requireThatIndexesInUseAreNotRemoved() {
createIndexes();
- ActiveDiskIndexes active_indexes;
- active_indexes.setActive(index_dir + "/index.fusion.1");
- active_indexes.setActive(index_dir + "/index.flush.2");
- DiskIndexCleaner::clean(index_dir, active_indexes);
+ DiskIndexes disk_indexes;
+ disk_indexes.setActive(index_dir + "/index.fusion.1", 0);
+ disk_indexes.setActive(index_dir + "/index.flush.2", 0);
+ DiskIndexCleaner::clean(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
EXPECT_TRUE(contains(indexes, "index.fusion.1"));
EXPECT_TRUE(contains(indexes, "index.flush.2"));
- active_indexes.notActive(index_dir + "/index.fusion.1");
- active_indexes.notActive(index_dir + "/index.flush.2");
- DiskIndexCleaner::clean(index_dir, active_indexes);
+ disk_indexes.notActive(index_dir + "/index.fusion.1");
+ disk_indexes.notActive(index_dir + "/index.flush.2");
+ DiskIndexCleaner::clean(index_dir, disk_indexes);
indexes = readIndexes();
EXPECT_TRUE(!contains(indexes, "index.fusion.1"));
EXPECT_TRUE(!contains(indexes, "index.flush.2"));
@@ -120,8 +120,8 @@ void Test::requireThatIndexesInUseAreNotRemoved() {
void Test::requireThatInvalidFlushIndexesAreRemoved() {
createIndexes();
FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete();
- ActiveDiskIndexes active_indexes;
- DiskIndexCleaner::clean(index_dir, active_indexes);
+ DiskIndexes disk_indexes;
+ DiskIndexCleaner::clean(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
EXPECT_EQUAL(2u, indexes.size());
EXPECT_TRUE(contains(indexes, "index.fusion.2"));
@@ -131,8 +131,8 @@ void Test::requireThatInvalidFlushIndexesAreRemoved() {
void Test::requireThatInvalidFusionIndexesAreRemoved() {
createIndexes();
FastOS_File((index_dir + "/index.fusion.2/serial.dat").c_str()).Delete();
- ActiveDiskIndexes active_indexes;
- DiskIndexCleaner::clean(index_dir, active_indexes);
+ DiskIndexes disk_indexes;
+ DiskIndexCleaner::clean(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
EXPECT_EQUAL(4u, indexes.size());
EXPECT_TRUE(contains(indexes, "index.fusion.1"));
@@ -144,8 +144,8 @@ void Test::requireThatInvalidFusionIndexesAreRemoved() {
void Test::requireThatRemoveDontTouchNewIndexes() {
createIndexes();
FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete();
- ActiveDiskIndexes active_indexes;
- DiskIndexCleaner::removeOldIndexes(index_dir, active_indexes);
+ DiskIndexes disk_indexes;
+ DiskIndexCleaner::removeOldIndexes(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
EXPECT_EQUAL(3u, indexes.size());
EXPECT_TRUE(contains(indexes, "index.fusion.2"));
diff --git a/searchcorespi/CMakeLists.txt b/searchcorespi/CMakeLists.txt
index 1029619150a..0bbaa813752 100644
--- a/searchcorespi/CMakeLists.txt
+++ b/searchcorespi/CMakeLists.txt
@@ -20,6 +20,6 @@ vespa_define_module(
src/vespa/searchcorespi/index
TESTS
- src/tests/index/active_disk_indexes
+ src/tests/index/disk_indexes
src/tests/index/index_disk_layout
)
diff --git a/searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt b/searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt
deleted file mode 100644
index e10ada381bf..00000000000
--- a/searchcorespi/src/tests/index/active_disk_indexes/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchcorespi_active_disk_indexes_test_app
- SOURCES
- active_disk_indexes_test.cpp
- DEPENDS
- searchcorespi
- GTest::GTest
-)
-vespa_add_test(NAME searchcorespi_active_disk_indexes_test_app COMMAND searchcorespi_active_disk_indexes_test_app)
diff --git a/searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp b/searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp
deleted file mode 100644
index a6d412817d9..00000000000
--- a/searchcorespi/src/tests/index/active_disk_indexes/active_disk_indexes_test.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/searchcorespi/index/activediskindexes.h>
-#include <vespa/searchcorespi/index/index_disk_dir.h>
-#include <vespa/vespalib/gtest/gtest.h>
-
-namespace searchcorespi::index {
-
-class ActiveDiskIndexesTest : public ::testing::Test,
- public ActiveDiskIndexes
-{
-protected:
- ActiveDiskIndexesTest();
- ~ActiveDiskIndexesTest();
-};
-
-ActiveDiskIndexesTest::ActiveDiskIndexesTest()
- : ::testing::Test(),
- ActiveDiskIndexes()
-{
-}
-
-ActiveDiskIndexesTest::~ActiveDiskIndexesTest() = default;
-
-TEST_F(ActiveDiskIndexesTest, simple_set_active_works)
-{
- EXPECT_FALSE(isActive("index.flush.1"));
- setActive("index.flush.1");
- EXPECT_TRUE(isActive("index.flush.1"));
- notActive("index.flush.1");
- EXPECT_FALSE(isActive("index.flush.1"));
-}
-
-TEST_F(ActiveDiskIndexesTest, nested_set_active_works)
-{
- setActive("index.flush.1");
- setActive("index.flush.1");
- EXPECT_TRUE(isActive("index.flush.1"));
- notActive("index.flush.1");
- EXPECT_TRUE(isActive("index.flush.1"));
- notActive("index.flush.1");
- EXPECT_FALSE(isActive("index.flush.1"));
-}
-
-TEST_F(ActiveDiskIndexesTest, is_active_returns_false_for_bad_name)
-{
- EXPECT_FALSE(isActive("foo/bar/baz"));
- EXPECT_FALSE(isActive("index.flush.0"));
-}
-
-}
-
-GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt b/searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt
new file mode 100644
index 00000000000..81b6bb0e8a9
--- /dev/null
+++ b/searchcorespi/src/tests/index/disk_indexes/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcorespi_disk_indexes_test_app
+ SOURCES
+ disk_indexes_test.cpp
+ DEPENDS
+ searchcorespi
+ GTest::GTest
+)
+vespa_add_test(NAME searchcorespi_disk_indexes_test_app COMMAND searchcorespi_disk_indexes_test_app)
diff --git a/searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp b/searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp
new file mode 100644
index 00000000000..d22ad499316
--- /dev/null
+++ b/searchcorespi/src/tests/index/disk_indexes/disk_indexes_test.cpp
@@ -0,0 +1,196 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcorespi/index/disk_indexes.h>
+#include <vespa/searchcorespi/index/index_disk_dir.h>
+#include <vespa/searchcorespi/index/indexdisklayout.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/vespalib/util/size_literals.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <fstream>
+
+namespace {
+
+vespalib::string base_dir("base");
+
+constexpr uint32_t block_size = 4_Ki;
+
+}
+
+namespace searchcorespi::index {
+
+class DiskIndexesTest : public ::testing::Test,
+ public DiskIndexes
+{
+ IndexDiskLayout _layout;
+protected:
+ DiskIndexesTest();
+ ~DiskIndexesTest();
+
+ static IndexDiskDir get_index_disk_dir(const vespalib::string& dir) {
+ return IndexDiskLayout::get_index_disk_dir(dir);
+ }
+
+ void assert_transient_size(uint64_t exp, IndexDiskDir index_disk_dir) {
+ EXPECT_EQ(exp, get_transient_size(_layout, index_disk_dir));
+ }
+};
+
+DiskIndexesTest::DiskIndexesTest()
+ : ::testing::Test(),
+ DiskIndexes(),
+ _layout(base_dir)
+{
+}
+
+DiskIndexesTest::~DiskIndexesTest() = default;
+
+TEST_F(DiskIndexesTest, simple_set_active_works)
+{
+ EXPECT_FALSE(isActive("index.flush.1"));
+ setActive("index.flush.1", 0);
+ EXPECT_TRUE(isActive("index.flush.1"));
+ notActive("index.flush.1");
+ EXPECT_FALSE(isActive("index.flush.1"));
+}
+
+TEST_F(DiskIndexesTest, nested_set_active_works)
+{
+ setActive("index.flush.1", 0);
+ setActive("index.flush.1", 0);
+ EXPECT_TRUE(isActive("index.flush.1"));
+ notActive("index.flush.1");
+ EXPECT_TRUE(isActive("index.flush.1"));
+ notActive("index.flush.1");
+ EXPECT_FALSE(isActive("index.flush.1"));
+}
+
+TEST_F(DiskIndexesTest, is_active_returns_false_for_bad_name)
+{
+ EXPECT_FALSE(isActive("foo/bar/baz"));
+ EXPECT_FALSE(isActive("index.flush.0"));
+}
+
+TEST_F(DiskIndexesTest, remove_works)
+{
+ EXPECT_TRUE(remove(IndexDiskDir()));
+ auto fusion1 = get_index_disk_dir("index.fusion.1");
+ EXPECT_TRUE(remove(fusion1));
+ add_not_active(fusion1);
+ EXPECT_TRUE(remove(fusion1));
+ setActive("index.fusion.1", 0);
+ EXPECT_FALSE(remove(fusion1));
+ notActive("index.fusion.1");
+ EXPECT_TRUE(remove(fusion1));
+}
+
+TEST_F(DiskIndexesTest, basic_get_transient_size_works)
+{
+ /*
+ * When starting to use a new fusion index, we have a transient
+ * period with two ISearchableIndexCollection instances:
+ * - old, containing index.fusion.1 and index.flush.2
+ * - new, containing index.fusion.2
+ */
+ setActive("index.fusion.1", 1000000);
+ setActive("index.flush.2", 500000);
+ setActive("index.fusion.2", 1200000);
+ auto fusion1 = get_index_disk_dir("index.fusion.1");
+ auto flush2 = get_index_disk_dir("index.flush.2");
+ auto fusion2 = get_index_disk_dir("index.fusion.2");
+ {
+ /*
+ * When using the old index collection, disk space used by
+ * index.fusion.2 is considered transient.
+ */
+ SCOPED_TRACE("index.fusion.1");
+ assert_transient_size(1200000, fusion1);
+ }
+ {
+ SCOPED_TRACE("index.flush.2");
+ assert_transient_size(0, flush2);
+ }
+ {
+ /*
+ * When using the new index collection, disk space used by
+ * index.fusion.1 and index.flush.2 is considered transient.
+ */
+ SCOPED_TRACE("index.fusion.2");
+ assert_transient_size(1500000, fusion2);
+ }
+ notActive("index.fusion.1");
+ notActive("index.flush.2");
+ {
+ /*
+ * old index collection removed.
+ */
+ SCOPED_TRACE("index.fusion.2 after remove of index.fusion.1 and index.flush.1");
+ assert_transient_size(0, fusion2);
+ }
+}
+
+TEST_F(DiskIndexesTest, get_transient_size_during_ongoing_fusion)
+{
+ /*
+ * During ongoing fusion, we have one ISearchableIndexCollection instance:
+ * - old, containing index.fusion.1 and index.flush.2
+ *
+ * Fusion output directory is index.fusion.2
+ */
+ setActive("index.fusion.1", 1000000);
+ setActive("index.flush.2", 500000);
+ auto fusion1 = get_index_disk_dir("index.fusion.1");
+ auto fusion2 = get_index_disk_dir("index.fusion.2");
+ add_not_active(fusion2); // start tracking disk space for fusion output
+ {
+ /*
+ * Fusion not yet started.
+ */
+ SCOPED_TRACE("dir missing");
+ assert_transient_size(0, fusion1);
+ }
+ auto dir = base_dir + "/index.fusion.2";
+ vespalib::mkdir(dir, true);
+ {
+ /*
+ * Fusion started, but no files written yet.
+ */
+ SCOPED_TRACE("empty dir");
+ assert_transient_size(0, fusion1);
+ }
+ constexpr uint32_t seek_pos = 999999;
+ {
+ std::string name = dir + "/foo";
+ std::ofstream ostr(name, std::ios::binary);
+ ostr.seekp(seek_pos);
+ ostr.write(" ", 1);
+ ostr.flush();
+ ostr.close();
+ }
+ {
+ /*
+ * Fusion started, one file written.
+ */
+ SCOPED_TRACE("single file");
+ assert_transient_size((seek_pos + block_size) / block_size * block_size, fusion1);
+ }
+ EXPECT_TRUE(remove(fusion2)); // stop tracking disk space for fusion output
+ {
+ /*
+ * Fusion aborted.
+ */
+ SCOPED_TRACE("removed");
+ assert_transient_size(0, fusion1);
+ }
+}
+
+}
+
+int
+main(int argc, char* argv[])
+{
+ vespalib::rmdir(base_dir, true);
+ ::testing::InitGoogleTest(&argc, argv);
+ auto result = RUN_ALL_TESTS();
+ vespalib::rmdir(base_dir, true);
+ return result;
+}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt b/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt
index 1987304dc7e..3995eb836fd 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt
+++ b/searchcorespi/src/vespa/searchcorespi/index/CMakeLists.txt
@@ -1,14 +1,14 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(searchcorespi_index OBJECT
SOURCES
- activediskindexes.cpp
diskindexcleaner.cpp
+ disk_indexes.cpp
disk_index_stats.cpp
eventlogger.cpp
fusionrunner.cpp
iindexmanager.cpp
iindexcollection.cpp
- index_disk_dir_active_state.cpp
+ index_disk_dir_state.cpp
index_manager_explorer.cpp
index_manager_stats.cpp
indexcollection.cpp
diff --git a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp b/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp
deleted file mode 100644
index fb9585fb58e..00000000000
--- a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "activediskindexes.h"
-#include "indexdisklayout.h"
-#include "index_disk_dir.h"
-#include "index_disk_dir_active_state.h"
-#include <cassert>
-
-using vespalib::string;
-
-namespace searchcorespi::index {
-
-ActiveDiskIndexes::ActiveDiskIndexes() = default;
-ActiveDiskIndexes::~ActiveDiskIndexes() = default;
-
-void ActiveDiskIndexes::setActive(const string &index) {
- auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index);
- assert(index_disk_dir.valid());
- std::lock_guard lock(_lock);
- auto insres = _active.insert(std::make_pair(index_disk_dir, IndexDiskDirActiveState()));
- insres.first->second.activate();
-}
-
-void ActiveDiskIndexes::notActive(const string & index) {
- auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index);
- assert(index_disk_dir.valid());
- std::lock_guard lock(_lock);
- auto it = _active.find(index_disk_dir);
- assert(it != _active.end());
- assert(it->second.is_active());
- it->second.deactivate();
- if (!it->second.is_active()) {
- _active.erase(it);
- }
-}
-
-bool ActiveDiskIndexes::isActive(const string &index) const {
- auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index);
- if (!index_disk_dir.valid()) {
- return false;
- }
- std::lock_guard lock(_lock);
- auto it = _active.find(index_disk_dir);
- return (it != _active.end()) && it->second.is_active();
-}
-
-}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h b/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h
deleted file mode 100644
index 365025e7450..00000000000
--- a/searchcorespi/src/vespa/searchcorespi/index/activediskindexes.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/stllike/string.h>
-#include <map>
-#include <mutex>
-#include <memory>
-
-namespace searchcorespi::index {
-
-class IndexDiskDir;
-class IndexDiskDirActiveState;
-
-/**
- * Class used to keep track of the set of active disk indexes in an index maintainer.
- * The index directories are used as identifiers.
- */
-class ActiveDiskIndexes {
- std::map<IndexDiskDir, IndexDiskDirActiveState> _active;
- mutable std::mutex _lock;
-
-public:
- using SP = std::shared_ptr<ActiveDiskIndexes>;
- ActiveDiskIndexes();
- ~ActiveDiskIndexes();
- ActiveDiskIndexes(const ActiveDiskIndexes &) = delete;
- ActiveDiskIndexes & operator = (const ActiveDiskIndexes &) = delete;
- void setActive(const vespalib::string & index);
- void notActive(const vespalib::string & index);
- bool isActive(const vespalib::string & index) const;
-};
-
-}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp
new file mode 100644
index 00000000000..28f6a886d06
--- /dev/null
+++ b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.cpp
@@ -0,0 +1,131 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "disk_indexes.h"
+#include "indexdisklayout.h"
+#include "index_disk_dir.h"
+#include "index_disk_dir_state.h"
+#include <vespa/searchlib/util/dirtraverse.h>
+#include <cassert>
+#include <vector>
+
+using vespalib::string;
+
+namespace searchcorespi::index {
+
+DiskIndexes::DiskIndexes() = default;
+DiskIndexes::~DiskIndexes() = default;
+
+void
+DiskIndexes::setActive(const string &index, uint64_t size_on_disk)
+{
+ auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index);
+ assert(index_disk_dir.valid());
+ std::lock_guard lock(_lock);
+ auto insres = _active.insert(std::make_pair(index_disk_dir, IndexDiskDirState()));
+ insres.first->second.activate();
+ if (!insres.first->second.get_size_on_disk().has_value()) {
+ insres.first->second.set_size_on_disk(size_on_disk);
+ }
+}
+
+void DiskIndexes::notActive(const string & index) {
+ auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index);
+ assert(index_disk_dir.valid());
+ std::lock_guard lock(_lock);
+ auto it = _active.find(index_disk_dir);
+ assert(it != _active.end());
+ assert(it->second.is_active());
+ it->second.deactivate();
+ if (!it->second.is_active()) {
+ _active.erase(it);
+ }
+}
+
+bool DiskIndexes::isActive(const string &index) const {
+ auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(index);
+ if (!index_disk_dir.valid()) {
+ return false;
+ }
+ std::lock_guard lock(_lock);
+ auto it = _active.find(index_disk_dir);
+ return (it != _active.end()) && it->second.is_active();
+}
+
+
+void
+DiskIndexes::add_not_active(IndexDiskDir index_disk_dir)
+{
+ std::lock_guard lock(_lock);
+ _active.insert(std::make_pair(index_disk_dir, IndexDiskDirState()));
+}
+
+bool
+DiskIndexes::remove(IndexDiskDir index_disk_dir)
+{
+ if (!index_disk_dir.valid()) {
+ return true;
+ }
+ std::lock_guard lock(_lock);
+ auto it = _active.find(index_disk_dir);
+ if (it == _active.end()) {
+ return true;
+ }
+ if (it->second.is_active()) {
+ return false;
+ }
+ _active.erase(it);
+ return true;
+}
+
+uint64_t
+DiskIndexes::get_transient_size(IndexDiskLayout& layout, IndexDiskDir index_disk_dir) const
+{
+ /*
+ * Only report transient size related to a valid fusion index. This ensures
+ * that transient size is reported once per index collection.
+ */
+ if (!index_disk_dir.valid() || !index_disk_dir.is_fusion_index()) {
+ return 0u;
+ }
+ uint64_t transient_size = 0u;
+ std::vector<IndexDiskDir> deferred;
+ {
+ std::lock_guard lock(_lock);
+ for (auto &entry : _active) {
+ if (entry.first < index_disk_dir) {
+ /*
+ * Indexes before current fusion index are on the way out and
+ * will be removed when all older index collections
+ * referencing them are destroyed. Disk space used by these
+ * indexes is considered transient.
+ */
+ if (entry.second.get_size_on_disk().has_value()) {
+ transient_size += entry.second.get_size_on_disk().value();
+ }
+ }
+ if (index_disk_dir < entry.first && entry.first.is_fusion_index()) {
+ /*
+ * Fusion indexes after current fusion index can be partially
+ * complete and might be removed if fusion is aborted. Disk
+ * space used by these indexes is consider transient.
+ */
+ if (entry.second.get_size_on_disk().has_value()) {
+ transient_size += entry.second.get_size_on_disk().value();
+ } else {
+ deferred.emplace_back(entry.first);
+ }
+ }
+ }
+ }
+ for (auto& entry : deferred) {
+ auto index_dir = layout.getFusionDir(entry.get_id());
+ try {
+ search::DirectoryTraverse dirt(index_dir.c_str());
+ transient_size += dirt.GetTreeSize();
+ } catch (std::exception &) {
+ }
+ }
+ return transient_size;
+}
+
+}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h
new file mode 100644
index 00000000000..842c1814faf
--- /dev/null
+++ b/searchcorespi/src/vespa/searchcorespi/index/disk_indexes.h
@@ -0,0 +1,46 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <map>
+#include <mutex>
+#include <memory>
+
+namespace searchcorespi::index {
+
+class IndexDiskDir;
+class IndexDiskDirState;
+class IndexDiskLayout;
+
+/**
+ * Class used to keep track of the set of disk indexes in an index maintainer.
+ * The index directories are used as identifiers.
+ *
+ * DiskIndexCleaner will remove old disk indexes not marked active,
+ * i.e. old disk indexes used by old index collections are not removed.
+ *
+ * At start of fusion, an entry for fusion output index is added, to allow for
+ * tracking of transient disk use while fusion is ongoing. If fusion fails then
+ * the entry is removed, otherwise the entry is marked active as a side effect
+ * of setting up a new index collection.
+ */
+class DiskIndexes {
+ std::map<IndexDiskDir, IndexDiskDirState> _active;
+ mutable std::mutex _lock;
+
+public:
+ using SP = std::shared_ptr<DiskIndexes>;
+ DiskIndexes();
+ ~DiskIndexes();
+ DiskIndexes(const DiskIndexes &) = delete;
+ DiskIndexes & operator = (const DiskIndexes &) = delete;
+ void setActive(const vespalib::string & index, uint64_t size_on_disk);
+ void notActive(const vespalib::string & index);
+ bool isActive(const vespalib::string & index) const;
+ void add_not_active(IndexDiskDir index_disk_dir);
+ bool remove(IndexDiskDir index_disk_dir);
+ uint64_t get_transient_size(IndexDiskLayout& layout, IndexDiskDir index_disk_dir) const;
+};
+
+}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp
index cce880eed3f..3bed7ea8ea7 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.cpp
@@ -1,7 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "diskindexcleaner.h"
-#include "activediskindexes.h"
+#include "disk_indexes.h"
+#include "indexdisklayout.h"
+#include "index_disk_dir.h"
#include <vespa/fastos/file.h>
#include <vespa/vespalib/io/fileutil.h>
#include <sstream>
@@ -80,13 +82,13 @@ bool isOldIndex(const string &index, uint32_t last_fusion_id) {
}
void removeOld(const string &base_dir, const vector<string> &indexes,
- const ActiveDiskIndexes &active_indexes, bool remove) {
+ DiskIndexes &disk_indexes, bool remove) {
uint32_t last_fusion_id = findLastFusionId(base_dir, indexes);
for (size_t i = 0; i < indexes.size(); ++i) {
const string index_dir = base_dir + "/" + indexes[i];
+ auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(indexes[i]);
if (isOldIndex(indexes[i], last_fusion_id) &&
- !active_indexes.isActive(index_dir))
- {
+ disk_indexes.remove(index_disk_dir)) {
if (remove) {
removeDir(index_dir);
} else {
@@ -108,16 +110,16 @@ void removeInvalid(const string &base_dir, const vector<string> &indexes) {
} // namespace
void DiskIndexCleaner::clean(const string &base_dir,
- const ActiveDiskIndexes &active_indexes) {
+ DiskIndexes &disk_indexes) {
vector<string> indexes = readIndexes(base_dir);
- removeOld(base_dir, indexes, active_indexes, false);
+ removeOld(base_dir, indexes, disk_indexes, false);
removeInvalid(base_dir, indexes);
}
void DiskIndexCleaner::removeOldIndexes(
- const string &base_dir, const ActiveDiskIndexes &active_indexes) {
+ const string &base_dir, DiskIndexes &disk_indexes) {
vector<string> indexes = readIndexes(base_dir);
- removeOld(base_dir, indexes, active_indexes, true);
+ removeOld(base_dir, indexes, disk_indexes, true);
}
}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h
index 798193ab00b..cbd3a5aa94f 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h
+++ b/searchcorespi/src/vespa/searchcorespi/index/diskindexcleaner.h
@@ -6,7 +6,7 @@
namespace searchcorespi {
namespace index {
-class ActiveDiskIndexes;
+class DiskIndexes;
/**
* Utility class used to clean and remove index directories.
@@ -16,9 +16,9 @@ struct DiskIndexCleaner {
* Deletes all indexes with id lower than the most recent fusion id.
*/
static void clean(const vespalib::string &index_dir,
- const ActiveDiskIndexes& active_indexes);
+ DiskIndexes& disk_indexes);
static void removeOldIndexes(const vespalib::string &index_dir,
- const ActiveDiskIndexes& active_indexes);
+ DiskIndexes& disk_indexes);
};
} // namespace index
diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h
index 278fd73e555..335838ddf2e 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h
+++ b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir.h
@@ -29,6 +29,8 @@ public:
return (_id == rhs._id) && (_fusion == rhs._fusion);
}
bool valid() const noexcept { return _id != 0u; }
+ bool is_fusion_index() const noexcept { return _fusion; }
+ uint64_t get_id() const noexcept { return _id; }
};
}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h
deleted file mode 100644
index ac84de5adee..00000000000
--- a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <cstdint>
-
-namespace searchcorespi::index {
-
-/*
- * Class describing active state for a disk index directory.
- */
-class IndexDiskDirActiveState {
- uint32_t _active_count;
-public:
- IndexDiskDirActiveState()
- : _active_count(0)
- {
- }
-
- void activate() noexcept { ++_active_count; }
- void deactivate() noexcept;
- bool is_active() const noexcept { return _active_count != 0; }
-};
-
-}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.cpp b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.cpp
index 603971c866e..ffe33d704c8 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_active_state.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.cpp
@@ -1,12 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "index_disk_dir_active_state.h"
+#include "index_disk_dir_state.h"
#include <cassert>
namespace searchcorespi::index {
void
-IndexDiskDirActiveState::deactivate() noexcept
+IndexDiskDirState::deactivate() noexcept
{
assert(_active_count > 0u);
--_active_count;
diff --git a/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h
new file mode 100644
index 00000000000..d8b790b3960
--- /dev/null
+++ b/searchcorespi/src/vespa/searchcorespi/index/index_disk_dir_state.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+
+namespace searchcorespi::index {
+
+/*
+ * Class describing state for a disk index directory.
+ */
+class IndexDiskDirState {
+ uint32_t _active_count;
+ std::optional<uint64_t> _size_on_disk;
+public:
+ IndexDiskDirState()
+ : _active_count(0),
+ _size_on_disk()
+ {
+ }
+
+ void activate() noexcept { ++_active_count; }
+ void deactivate() noexcept;
+ bool is_active() const noexcept { return _active_count != 0; }
+ const std::optional<uint64_t>& get_size_on_disk() const noexcept { return _size_on_disk; }
+ void set_size_on_disk(uint64_t size_on_disk) noexcept { _size_on_disk = size_on_disk; }
+};
+
+}
diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
index 6489cb156ce..afe5b573f21 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
@@ -8,6 +8,7 @@
#include "indexmaintainer.h"
#include "indexreadutilities.h"
#include "indexwriteutilities.h"
+#include "index_disk_dir.h"
#include <vespa/searchcorespi/flush/lambdaflushtask.h>
#include <vespa/searchlib/common/i_flush_token.h>
#include <vespa/searchlib/index/schemautil.h>
@@ -88,13 +89,22 @@ class DiskIndexWithDestructorCallback : public IDiskIndex {
private:
std::shared_ptr<IDestructorCallback> _callback;
IDiskIndex::SP _index;
+ IndexDiskDir _index_disk_dir;
+ IndexDiskLayout& _layout;
+ DiskIndexes& _disk_indexes;
public:
DiskIndexWithDestructorCallback(IDiskIndex::SP index,
- std::shared_ptr<IDestructorCallback> callback) noexcept
+ std::shared_ptr<IDestructorCallback> callback,
+ IndexDiskLayout& layout,
+ DiskIndexes& disk_indexes) noexcept
: _callback(std::move(callback)),
- _index(std::move(index))
- { }
+ _index(std::move(index)),
+ _index_disk_dir(IndexDiskLayout::get_index_disk_dir(_index->getIndexDir())),
+ _layout(layout),
+ _disk_indexes(disk_indexes)
+ {
+ }
~DiskIndexWithDestructorCallback() override;
const IDiskIndex &getWrapped() const { return *_index; }
@@ -117,8 +127,7 @@ public:
{
return _index->createBlueprint(requestContext, fields, term);
}
- // TODO: Calculate the total disk size of current fusion indexes and set fusion_size_on_disk().
- search::SearchableStats getSearchableStats() const override { return _index->getSearchableStats(); }
+ search::SearchableStats getSearchableStats() const override;
search::SerialNum getSerialNum() const override {
return _index->getSerialNum();
}
@@ -143,6 +152,16 @@ public:
DiskIndexWithDestructorCallback::~DiskIndexWithDestructorCallback() = default;
+search::SearchableStats
+DiskIndexWithDestructorCallback::getSearchableStats() const
+{
+ auto stats = _index->getSearchableStats();
+ uint64_t transient_size = _disk_indexes.get_transient_size(_layout, _index_disk_dir);
+ stats.fusion_size_on_disk(transient_size);
+ return stats;
+}
+
+
} // namespace
IndexMaintainer::FusionArgs::FusionArgs()
@@ -272,7 +291,7 @@ IndexMaintainer::updateActiveFusionPrunedSchema(const Schema &schema)
void
IndexMaintainer::deactivateDiskIndexes(vespalib::string indexDir)
{
- _active_indexes->notActive(indexDir);
+ _disk_indexes->notActive(indexDir);
removeOldDiskIndexes();
}
@@ -284,10 +303,13 @@ IndexMaintainer::loadDiskIndex(const string &indexDir)
EventLogger::diskIndexLoadStart(indexDir);
}
vespalib::Timer timer;
- _active_indexes->setActive(indexDir);
+ auto index = _operations.loadDiskIndex(indexDir);
+ auto stats = index->getSearchableStats();
+ _disk_indexes->setActive(indexDir, stats.sizeOnDisk());
auto retval = std::make_shared<DiskIndexWithDestructorCallback>(
- _operations.loadDiskIndex(indexDir),
- makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); }));
+ std::move(index),
+ makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); }),
+ _layout, *_disk_indexes);
if (LOG_WOULD_LOG(event)) {
EventLogger::diskIndexLoadComplete(indexDir, vespalib::count_ms(timer.elapsed()));
}
@@ -303,11 +325,14 @@ IndexMaintainer::reloadDiskIndex(const IDiskIndex &oldIndex)
EventLogger::diskIndexLoadStart(indexDir);
}
vespalib::Timer timer;
- _active_indexes->setActive(indexDir);
const IDiskIndex &wrappedDiskIndex = (dynamic_cast<const DiskIndexWithDestructorCallback &>(oldIndex)).getWrapped();
+ auto index = _operations.reloadDiskIndex(wrappedDiskIndex);
+ auto stats = index->getSearchableStats();
+ _disk_indexes->setActive(indexDir, stats.sizeOnDisk());
auto retval = std::make_shared<DiskIndexWithDestructorCallback>(
- _operations.reloadDiskIndex(wrappedDiskIndex),
- makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); }));
+ std::move(index),
+ makeLambdaCallback([this, indexDir]() { deactivateDiskIndexes(indexDir); }),
+ _layout, *_disk_indexes);
if (LOG_WOULD_LOG(event)) {
EventLogger::diskIndexLoadComplete(indexDir, vespalib::count_ms(timer.elapsed()));
}
@@ -844,7 +869,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config,
IIndexMaintainerOperations &operations)
: _base_dir(config.getBaseDir()),
_warmupConfig(config.getWarmup()),
- _active_indexes(std::make_shared<ActiveDiskIndexes>()),
+ _disk_indexes(std::make_shared<DiskIndexes>()),
_layout(config.getBaseDir()),
_schema(config.getSchema()),
_activeFusionSchema(),
@@ -877,7 +902,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config,
{
// Called by document db init executor thread
_changeGens.bumpPruneGen();
- DiskIndexCleaner::clean(_base_dir, *_active_indexes);
+ DiskIndexCleaner::clean(_base_dir, *_disk_indexes);
FusionSpec spec = IndexReadUtilities::readFusionSpec(_base_dir);
_next_id = 1 + (spec.flush_ids.empty() ? spec.last_fusion_id : spec.flush_ids.back());
_last_fusion_id = spec.last_fusion_id;
@@ -1013,6 +1038,27 @@ IndexMaintainer::doFusion(SerialNum serialNum, std::shared_ptr<search::IFlushTok
return getFusionDir(new_fusion_id);
}
+namespace {
+
+class RemoveFusionIndexGuard {
+ DiskIndexes* _disk_indexes;
+ IndexDiskDir _index_disk_dir;
+public:
+ RemoveFusionIndexGuard(DiskIndexes& disk_indexes, IndexDiskDir index_disk_dir)
+ : _disk_indexes(&disk_indexes),
+ _index_disk_dir(index_disk_dir)
+ {
+ _disk_indexes->add_not_active(index_disk_dir);
+ }
+ ~RemoveFusionIndexGuard() {
+ if (_disk_indexes != nullptr) {
+ (void) _disk_indexes->remove(_index_disk_dir);
+ }
+ }
+ void reset() { _disk_indexes = nullptr; }
+};
+
+}
uint32_t
IndexMaintainer::runFusion(const FusionSpec &fusion_spec, std::shared_ptr<search::IFlushToken> flush_token)
@@ -1034,6 +1080,8 @@ IndexMaintainer::runFusion(const FusionSpec &fusion_spec, std::shared_ptr<search
if (FastOS_File::Stat(lastSerialFile.c_str(), &statInfo)) {
serialNum = IndexReadUtilities::readSerialNum(lastFlushDir);
}
+ IndexDiskDir fusion_index_disk_dir(fusion_spec.flush_ids.back(), true);
+ RemoveFusionIndexGuard remove_fusion_index_guard(*_disk_indexes, fusion_index_disk_dir);
FusionRunner fusion_runner(_base_dir, args._schema, tuneFileAttributes, _ctx.getFileHeaderContext());
uint32_t new_fusion_id = fusion_runner.fuse(fusion_spec, serialNum, _operations, flush_token);
bool ok = (new_fusion_id != 0);
@@ -1066,6 +1114,7 @@ IndexMaintainer::runFusion(const FusionSpec &fusion_spec, std::shared_ptr<search
}
ChangeGens changeGens = getChangeGens();
IDiskIndex::SP new_index(loadDiskIndex(new_fusion_dir));
+ remove_fusion_index_guard.reset();
// Post processing after fusion operation has completed and new disk
// index has been opened.
@@ -1101,7 +1150,7 @@ void
IndexMaintainer::removeOldDiskIndexes()
{
LockGuard slock(_remove_lock);
- DiskIndexCleaner::removeOldIndexes(_base_dir, *_active_indexes);
+ DiskIndexCleaner::removeOldIndexes(_base_dir, *_disk_indexes);
}
IndexMaintainer::FlushStats
diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h
index 8213c02b90c..fa7a9145a3c 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h
+++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.h
@@ -2,7 +2,7 @@
#pragma once
#include "iindexmanager.h"
-#include "activediskindexes.h"
+#include "disk_indexes.h"
#include "fusionspec.h"
#include "idiskindex.h"
#include "iindexmaintaineroperations.h"
@@ -76,7 +76,7 @@ class IndexMaintainer : public IIndexManager,
const vespalib::string _base_dir;
const WarmupConfig _warmupConfig;
- ActiveDiskIndexes::SP _active_indexes;
+ DiskIndexes::SP _disk_indexes;
IndexDiskLayout _layout;
Schema _schema; // Protected by SL + IUL
Schema::SP _activeFusionSchema; // Protected by SL + IUL