aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-03-01 17:02:07 +0000
committerGeir Storli <geirst@yahooinc.com>2024-03-01 17:09:27 +0000
commit04a37cd013af81b9380d543ed67d609864f22716 (patch)
tree491c45aa82168c0677c4eff165486ed198b3d671 /searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp
parent01df9824d10c6a353bc7d811c883986c4682b248 (diff)
Add initial support to benchmark over a disk index field.
Diffstat (limited to 'searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp')
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp88
1 files changed, 88 insertions, 0 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp
new file mode 100644
index 00000000000..96fcc261837
--- /dev/null
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/disk_index_builder.cpp
@@ -0,0 +1,88 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "disk_index_builder.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/diskindex/diskindex.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <filesystem>
+
+using search::diskindex::DiskIndex;
+using search::index::DocIdAndPosOccFeatures;
+using search::index::Schema;
+
+namespace search::queryeval::test {
+
+constexpr search::queryeval::Source default_source = 0;
+
+DiskIndexBuilder::DiskIndexBuilder(const Schema& schema, vespalib::stringref index_dir, uint32_t docid_limit, uint64_t num_words)
+ : _schema(schema),
+ _field_length_inspector(),
+ _tune_file_indexing(),
+ _tune_file_attributes(),
+ _tune_file_search(),
+ _file_header_ctx(),
+ _index_dir(index_dir),
+ _selector(default_source, _index_dir + "/selector", docid_limit),
+ _builder(_schema, index_dir, docid_limit, num_words, _field_length_inspector, _tune_file_indexing, _file_header_ctx),
+ _field_builder(_builder.startField(0))
+{
+ // Mark all documents as being part of this disk index.
+ for (uint32_t docid = 0; docid < docid_limit; ++docid) {
+ _selector.setSource(docid, default_source);
+ }
+}
+
+void
+DiskIndexBuilder::add_word(vespalib::stringref word, search::BitVector& docids, uint32_t num_occs)
+{
+ DocIdAndPosOccFeatures diaf;
+ diaf.word_positions().reserve(num_occs);
+ for (uint32_t word_pos = 0; word_pos < num_occs; ++word_pos) {
+ diaf.addNextOcc(0, word_pos, 1, num_occs * 10);
+ }
+ diaf.set_field_length(num_occs * 10);
+ diaf.set_num_occs(num_occs);
+ _field_builder->startWord(word);
+ docids.foreach_truebit([&](uint32_t docid) {
+ diaf.set_doc_id(docid);
+ _field_builder->add_document(diaf);
+ });
+ _field_builder->endWord();
+}
+
+namespace {
+
+class DiskIndexSearchable : public BenchmarkSearchable {
+private:
+ std::unique_ptr<DiskIndex> _index;
+
+public:
+ DiskIndexSearchable(std::unique_ptr<DiskIndex> index) : _index(std::move(index)) {}
+ ~DiskIndexSearchable() {
+ vespalib::string index_dir = _index->getIndexDir();
+ _index.reset();
+ std::filesystem::remove_all(std::filesystem::path(index_dir));
+ }
+ std::unique_ptr<Blueprint> create_blueprint(const FieldSpec& field,
+ const search::query::Node& term) override {
+ FakeRequestContext req_ctx;
+ return _index->createBlueprint(req_ctx, field, term);
+ }
+};
+
+}
+
+std::unique_ptr<BenchmarkSearchable>
+DiskIndexBuilder::build()
+{
+ _field_builder.reset();
+ _selector.extractSaveInfo(_index_dir + "/selector")->save(_tune_file_attributes, _file_header_ctx);
+ auto index = std::make_unique<DiskIndex>(_index_dir);
+ bool setup = index->setup(_tune_file_search);
+ assert(setup);
+ return std::make_unique<DiskIndexSearchable>(std::move(index));
+}
+
+}