summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcorespi/src/vespa/searchcorespi/flush/iflushtarget.h2
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/attribute_header/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp77
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.cpp76
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp26
7 files changed, 176 insertions, 21 deletions
diff --git a/searchcorespi/src/vespa/searchcorespi/flush/iflushtarget.h b/searchcorespi/src/vespa/searchcorespi/flush/iflushtarget.h
index 31707643649..03d9ba8d55c 100644
--- a/searchcorespi/src/vespa/searchcorespi/flush/iflushtarget.h
+++ b/searchcorespi/src/vespa/searchcorespi/flush/iflushtarget.h
@@ -153,7 +153,7 @@ public:
virtual Time getLastFlushTime() const = 0;
/**
- * Return if the traget itself is in bad need for a flush.
+ * Return if the target itself is in bad need for a flush.
*
* @return true if an urgent flush is needed
*/
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 4237bede9d5..19cad2f3905 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -68,6 +68,7 @@ vespa_define_module(
src/tests/aggregator
src/tests/alignment
src/tests/attribute
+ src/tests/attribute/attribute_header
src/tests/attribute/attribute_operation
src/tests/attribute/attributefilewriter
src/tests/attribute/attributemanager
diff --git a/searchlib/src/tests/attribute/attribute_header/CMakeLists.txt b/searchlib/src/tests/attribute/attribute_header/CMakeLists.txt
new file mode 100644
index 00000000000..e72c0c6a528
--- /dev/null
+++ b/searchlib/src/tests/attribute/attribute_header/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attribute_header_test_app TEST
+ SOURCES
+ attribute_header_test.cpp
+ DEPENDS
+ searchlib
+ gtest
+)
+vespa_add_test(NAME searchlib_attribute_header_test_app COMMAND searchlib_attribute_header_test_app)
diff --git a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
new file mode 100644
index 00000000000..0f542d016a9
--- /dev/null
+++ b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
@@ -0,0 +1,77 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/attribute/attribute_header.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_header_test");
+
+using namespace search;
+using namespace search::attribute;
+
+using HnswIPO = std::optional<HnswIndexParams>;
+using vespalib::eval::ValueType;
+
+const Config tensor_cfg(BasicType::TENSOR, CollectionType::SINGLE);
+const vespalib::string file_name = "my_file_name";
+const ValueType tensor_type = ValueType::from_spec("tensor<float>(x[4])");
+constexpr uint32_t num_docs = 23;
+constexpr uint64_t unique_value_count = 11;
+constexpr uint64_t total_value_count = 13;
+constexpr uint64_t create_serial_num = 17;
+constexpr uint32_t version = 19;
+
+vespalib::GenericHeader
+populate_header(const HnswIPO& hnsw_params)
+{
+ AttributeHeader header(file_name,
+ tensor_cfg.basicType(),
+ tensor_cfg.collectionType(),
+ tensor_type,
+ false,
+ PersistentPredicateParams(),
+ hnsw_params,
+ num_docs,
+ unique_value_count,
+ total_value_count,
+ create_serial_num,
+ version);
+
+ vespalib::GenericHeader result;
+ header.addTags(result);
+ return result;
+}
+
+void
+verify_roundtrip_serialization(const HnswIPO& hnsw_params_in)
+{
+ auto gen_header = populate_header(hnsw_params_in);
+ auto attr_header = AttributeHeader::extractTags(gen_header);
+
+ EXPECT_EQ(tensor_cfg.basicType(), attr_header.getBasicType());
+ EXPECT_EQ(tensor_cfg.collectionType(), attr_header.getCollectionType());
+ EXPECT_EQ(tensor_type, attr_header.getTensorType());
+ EXPECT_EQ(num_docs, attr_header.getNumDocs());
+ EXPECT_EQ(create_serial_num, attr_header.getCreateSerialNum());
+ EXPECT_EQ(version, attr_header.getVersion());
+ EXPECT_EQ(false, attr_header.getPredicateParamsSet());
+ const auto& hnsw_params_out = attr_header.get_hnsw_index_params();
+ EXPECT_EQ(hnsw_params_in.has_value(), hnsw_params_out.has_value());
+ if (hnsw_params_in.has_value()) {
+ EXPECT_EQ(hnsw_params_in.value(), hnsw_params_out.value());
+ }
+}
+
+TEST(AttributeHeaderTest, can_be_added_to_and_extracted_from_generic_header)
+{
+ verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Euclidean}));
+ verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Angular}));
+ verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::GeoDegrees}));
+ verify_roundtrip_serialization(HnswIPO());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
index 3d7010ba6c3..b35a88fab77 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
@@ -3,6 +3,7 @@
#include "attribute_header.h"
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/util/exceptions.h>
namespace search::attribute {
@@ -18,6 +19,13 @@ const vespalib::string tensorTypeTag = "tensortype";
const vespalib::string predicateArityTag = "predicate.arity";
const vespalib::string predicateLowerBoundTag = "predicate.lower_bound";
const vespalib::string predicateUpperBoundTag = "predicate.upper_bound";
+const vespalib::string hnsw_max_links_tag = "hnsw.max_links_per_node";
+const vespalib::string hnsw_neighbors_to_explore_tag = "hnsw.neighbors_to_explore_at_insert";
+const vespalib::string hnsw_distance_metric = "hnsw.distance_metric";
+const vespalib::string euclidean = "euclidean";
+const vespalib::string angular = "angular";
+const vespalib::string geodegrees = "geodegrees";
+const vespalib::string doc_id_limit_tag = "docIdLimit";
}
@@ -35,6 +43,7 @@ AttributeHeader::AttributeHeader(const vespalib::string &fileName)
_collectionTypeParamsSet(false),
_predicateParamsSet(false),
_predicateParams(),
+ _hnsw_index_params(),
_numDocs(0),
_uniqueValueCount(0),
_totalValueCount(0),
@@ -43,11 +52,18 @@ AttributeHeader::AttributeHeader(const vespalib::string &fileName)
{
}
-AttributeHeader::AttributeHeader(const vespalib::string &fileName, attribute::BasicType basicType,
- attribute::CollectionType collectionType, const vespalib::eval::ValueType &tensorType,
- bool enumerated, const attribute::PersistentPredicateParams &predicateParams,
- uint32_t numDocs, [[maybe_unused]] uint32_t fixedWidth, uint64_t uniqueValueCount,
- uint64_t totalValueCount, uint64_t createSerialNum, uint32_t version)
+AttributeHeader::AttributeHeader(const vespalib::string &fileName,
+ attribute::BasicType basicType,
+ attribute::CollectionType collectionType,
+ const vespalib::eval::ValueType &tensorType,
+ bool enumerated,
+ const attribute::PersistentPredicateParams &predicateParams,
+ const std::optional<HnswIndexParams>& hnsw_index_params,
+ uint32_t numDocs,
+ uint64_t uniqueValueCount,
+ uint64_t totalValueCount,
+ uint64_t createSerialNum,
+ uint32_t version)
: _fileName(fileName),
_basicType(basicType),
_collectionType(collectionType),
@@ -56,6 +72,7 @@ AttributeHeader::AttributeHeader(const vespalib::string &fileName, attribute::Ba
_collectionTypeParamsSet(false),
_predicateParamsSet(false),
_predicateParams(predicateParams),
+ _hnsw_index_params(hnsw_index_params),
_numDocs(numDocs),
_uniqueValueCount(uniqueValueCount),
_totalValueCount(totalValueCount),
@@ -66,6 +83,35 @@ AttributeHeader::AttributeHeader(const vespalib::string &fileName, attribute::Ba
AttributeHeader::~AttributeHeader() = default;
+namespace {
+
+vespalib::string
+to_string(DistanceMetric metric)
+{
+ switch (metric) {
+ case DistanceMetric::Euclidean: return euclidean;
+ case DistanceMetric::Angular: return angular;
+ case DistanceMetric::GeoDegrees: return geodegrees;
+ }
+ throw vespalib::IllegalArgumentException("Unknown distance metric " + std::to_string(static_cast<int>(metric)));
+}
+
+DistanceMetric
+to_distance_metric(const vespalib::string& metric)
+{
+ if (metric == euclidean) {
+ return DistanceMetric::Euclidean;
+ } else if (metric == angular) {
+ return DistanceMetric::Angular;
+ } else if (metric == geodegrees) {
+ return DistanceMetric::GeoDegrees;
+ } else {
+ throw vespalib::IllegalStateException("Unknown distance metric '" + metric + "'");
+ }
+}
+
+}
+
void
AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header)
{
@@ -91,6 +137,15 @@ AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header)
if (_basicType.type() == BasicType::Type::TENSOR) {
assert(header.hasTag(tensorTypeTag));
_tensorType = vespalib::eval::ValueType::from_spec(header.getTag(tensorTypeTag).asString());
+ if (header.hasTag(hnsw_max_links_tag)) {
+ assert(header.hasTag(hnsw_neighbors_to_explore_tag));
+ assert(header.hasTag(hnsw_distance_metric));
+
+ uint32_t max_links = header.getTag(hnsw_max_links_tag).asInteger();
+ uint32_t neighbors_to_explore = header.getTag(hnsw_neighbors_to_explore_tag).asInteger();
+ DistanceMetric distance_metric = to_distance_metric(header.getTag(hnsw_distance_metric).asString());
+ _hnsw_index_params.emplace(max_links, neighbors_to_explore, distance_metric);
+ }
}
if (_basicType.type() == BasicType::Type::PREDICATE) {
if (header.hasTag(predicateArityTag)) {
@@ -105,6 +160,9 @@ AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header)
assert(!header.hasTag(predicateUpperBoundTag));
}
}
+ if (header.hasTag(doc_id_limit_tag)) {
+ _numDocs = header.getTag(doc_id_limit_tag).asInteger();
+ }
if (header.hasTag(versionTag)) {
_version = header.getTag(versionTag).asInteger();
}
@@ -130,7 +188,7 @@ AttributeHeader::addTags(vespalib::GenericHeader &header) const
}
header.putTag(Tag("uniqueValueCount", _uniqueValueCount));
header.putTag(Tag("totalValueCount", _totalValueCount));
- header.putTag(Tag("docIdLimit", _numDocs));
+ header.putTag(Tag(doc_id_limit_tag, _numDocs));
header.putTag(Tag("frozen", 0));
header.putTag(Tag("fileBitSize", 0));
header.putTag(Tag(versionTag, _version));
@@ -142,6 +200,12 @@ AttributeHeader::addTags(vespalib::GenericHeader &header) const
}
if (_basicType.type() == attribute::BasicType::Type::TENSOR) {
header.putTag(Tag(tensorTypeTag, _tensorType.to_spec()));;
+ if (_hnsw_index_params.has_value()) {
+ const auto& params = *_hnsw_index_params;
+ header.putTag(Tag(hnsw_max_links_tag, params.max_links_per_node()));
+ header.putTag(Tag(hnsw_neighbors_to_explore_tag, params.neighbors_to_explore_at_insert()));
+ header.putTag(Tag(hnsw_distance_metric, to_string(params.distance_metric())));
+ }
}
if (_basicType.type() == attribute::BasicType::Type::PREDICATE) {
const auto & params = _predicateParams;
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
index 24eac8336b4..583253eea0f 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
@@ -5,8 +5,10 @@
#include <vespa/vespalib/stllike/string.h>
#include <vespa/searchcommon/attribute/basictype.h>
#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/searchcommon/attribute/hnsw_index_params.h>
#include <vespa/searchcommon/attribute/predicate_params.h>
#include <vespa/eval/eval/value_type.h>
+#include <optional>
namespace vespalib { class GenericHeader; }
@@ -26,6 +28,7 @@ private:
bool _collectionTypeParamsSet;
bool _predicateParamsSet;
PersistentPredicateParams _predicateParams;
+ std::optional<HnswIndexParams> _hnsw_index_params;
uint32_t _numDocs;
uint64_t _uniqueValueCount;
uint64_t _totalValueCount;
@@ -42,8 +45,8 @@ public:
const vespalib::eval::ValueType &tensorType,
bool enumerated,
const PersistentPredicateParams &predicateParams,
+ const std::optional<HnswIndexParams>& hnsw_index_params,
uint32_t numDocs,
- uint32_t fixedWidth,
uint64_t uniqueValueCount,
uint64_t totalValueCount,
uint64_t createSerialNum,
@@ -63,6 +66,7 @@ public:
const PersistentPredicateParams &getPredicateParams() const { return _predicateParams; }
bool getPredicateParamsSet() const { return _predicateParamsSet; }
bool getCollectionTypeParamsSet() const { return _collectionTypeParamsSet; }
+ const std::optional<HnswIndexParams>& get_hnsw_index_params() const { return _hnsw_index_params; }
static AttributeHeader extractTags(const vespalib::GenericHeader &header);
void addTags(vespalib::GenericHeader &header) const;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index ffc62d806e2..1f002ce612c 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -306,19 +306,19 @@ AttributeVector::save(IAttributeSaveTarget &saveTarget, vespalib::stringref file
attribute::AttributeHeader
AttributeVector::createAttributeHeader(vespalib::stringref fileName) const {
return attribute::AttributeHeader(fileName,
- getConfig().basicType(),
- getConfig().collectionType(),
- getConfig().basicType().type() == BasicType::Type::TENSOR
- ? getConfig().tensorType()
- : vespalib::eval::ValueType::error_type(),
- getEnumeratedSave(),
- getConfig().predicateParams(),
- getCommittedDocIdLimit(),
- getFixedWidth(),
- getUniqueValueCount(),
- getTotalValueCount(),
- getCreateSerialNum(),
- getVersion());
+ getConfig().basicType(),
+ getConfig().collectionType(),
+ (getConfig().basicType().type() == BasicType::Type::TENSOR
+ ? getConfig().tensorType()
+ : vespalib::eval::ValueType::error_type()),
+ getEnumeratedSave(),
+ getConfig().predicateParams(),
+ getConfig().hnsw_index_params(),
+ getCommittedDocIdLimit(),
+ getUniqueValueCount(),
+ getTotalValueCount(),
+ getCreateSerialNum(),
+ getVersion());
}
void AttributeVector::onSave(IAttributeSaveTarget &)