summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-07-04 10:39:18 +0200
committerTor Egge <Tor.Egge@online.no>2023-07-04 10:39:18 +0200
commit11776b1e672b4261b5e9701e7dc96c9811031799 (patch)
tree2977440463d7d4bd822763021709502496b441f6 /searchlib
parent4ccf8b0182224a61aa0d71de89ba71e1066ebc9d (diff)
Add sort blob writers.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/sort_blob_writers/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/sort_blob_writers/sort_blob_writers_test.cpp344
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.cpp77
-rw-r--r--searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.h25
-rw-r--r--searchlib/src/vespa/searchlib/attribute/sort_blob_writer.h21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.cpp72
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.h31
9 files changed, 582 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index eb199a7ba44..e885f9bfba7 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -97,6 +97,7 @@ vespa_define_module(
src/tests/attribute/searchable
src/tests/attribute/searchcontext
src/tests/attribute/searchcontextelementiterator
+ src/tests/attribute/sort_blob_writers
src/tests/attribute/sourceselector
src/tests/attribute/stringattribute
src/tests/attribute/tensorattribute
diff --git a/searchlib/src/tests/attribute/sort_blob_writers/CMakeLists.txt b/searchlib/src/tests/attribute/sort_blob_writers/CMakeLists.txt
new file mode 100644
index 00000000000..5aad3c55cb7
--- /dev/null
+++ b/searchlib/src/tests/attribute/sort_blob_writers/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sort_blob_writers_test_app TEST
+ SOURCES
+ sort_blob_writers_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_sort_blob_writers_test_app COMMAND searchlib_sort_blob_writers_test_app)
diff --git a/searchlib/src/tests/attribute/sort_blob_writers/sort_blob_writers_test.cpp b/searchlib/src/tests/attribute/sort_blob_writers/sort_blob_writers_test.cpp
new file mode 100644
index 00000000000..50df88b8c74
--- /dev/null
+++ b/searchlib/src/tests/attribute/sort_blob_writers/sort_blob_writers_test.cpp
@@ -0,0 +1,344 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/numeric_sort_blob_writer.h>
+#include <vespa/searchlib/attribute/string_sort_blob_writer.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/fastlib/text/normwordfolder.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/util/arrayref.h>
+#include <vespa/vespalib/util/sort.h>
+
+using search::attribute::NumericSortBlobWriter;
+using search::attribute::SortBlobWriter;
+using search::attribute::StringSortBlobWriter;
+using search::common::BlobConverter;
+using search::common::LowercaseConverter;
+using vespalib::ConstArrayRef;
+
+namespace {
+
+using SortData = std::vector<unsigned char>;
+
+SortData missing_value{1};
+
+template <typename T, bool asc>
+SortData
+serialized_present_numeric(T value)
+{
+ SortData s;
+ s.resize(1 + sizeof(T));
+ s[0] = SortBlobWriter::has_value;
+ auto ret = vespalib::serializeForSort<vespalib::convertForSort<T, asc>>(value, s.data() + 1, s.size() - 1);
+ assert(size_t(ret) == s.size() - 1);
+ return s;
+}
+
+SortData
+serialized_present_string(const char *value, bool asc)
+{
+ ConstArrayRef<unsigned char> src(reinterpret_cast<const unsigned char*>(value), strlen(value) + 1);
+ SortData s;
+ s.reserve(src.size() + 1);
+ s.emplace_back(SortBlobWriter::has_value);
+ unsigned char xor_value = asc ? 0 : 255;
+ for (auto c : src) {
+ s.emplace_back(c ^ xor_value);
+ }
+ return s;
+}
+
+template <typename T>
+SortData
+serialized_present(T value, bool asc)
+{
+ if constexpr (std::is_same_v<T, const char*>) {
+ return serialized_present_string(value, asc);
+ } else {
+ if (asc) {
+ return serialized_present_numeric<T, true>(value);
+ }
+ return serialized_present_numeric<T, false>(value);
+ }
+}
+
+template <typename T, bool asc>
+SortData
+sort_data_numeric(std::vector<T> values)
+{
+ size_t len = 0;
+ SortData s;
+ while (true) {
+ s.clear();
+ s.resize(len);
+ NumericSortBlobWriter<T, asc> writer;
+ for (auto& v : values) {
+ writer.candidate(v);
+ }
+ auto result = writer.write(s.data(), s.size());
+ if (result >= 0) {
+ s.resize(result);
+ return s;
+ }
+ ++len;
+ }
+}
+
+SortData
+sort_data_string(std::vector<const char*> values, const BlobConverter* bc, bool asc)
+{
+ size_t len = 0;
+ SortData s;
+ while (true) {
+ s.clear();
+ s.resize(len);
+ StringSortBlobWriter writer(s.data(), s.size(), bc, asc);
+ bool fail = false;
+ for (auto& v : values) {
+ if (!writer.candidate(v)) {
+ fail = true;
+ break;
+ }
+ }
+ if (!fail) {
+ auto result = writer.write();
+ if (result >= 0) {
+ s.resize(result);
+ return s;
+ }
+ }
+ ++len;
+ }
+}
+
+SortData
+sort_data_string(std::vector<const char*> values, bool asc)
+{
+ return sort_data_string(values, nullptr, asc);
+}
+
+template <typename T>
+SortData
+sort_data(std::vector<T> values, bool asc)
+{
+ if constexpr (std::is_same_v<T, const char*>) {
+ return sort_data_string(values, asc);
+ }
+ if (asc) {
+ return sort_data_numeric<T, true>(std::move(values));
+ }
+ return sort_data_numeric<T, false>(std::move(values));
+}
+
+SortData
+switch_sort_order(SortData value)
+{
+ assert(value.size() >= 1);
+ ConstArrayRef<unsigned char> src(value.data() + 1, value.size() - 1);
+ SortData s;
+ s.reserve(src.size() + 1);
+ s.emplace_back(value[0]);
+ for (auto c : src) {
+ s.emplace_back(c ^ 255);
+ }
+ return s;
+}
+
+struct SetupHook {
+ SetupHook();
+};
+
+SetupHook::SetupHook()
+{
+ Fast_NormalizeWordFolder::Setup(Fast_NormalizeWordFolder::DO_ACCENT_REMOVAL |
+ Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION |
+ Fast_NormalizeWordFolder::DO_LIGATURE_SUBSTITUTION |
+ Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION);
+}
+
+SetupHook setup_hook;
+
+}
+
+template <typename Param>
+class SortBlobWritersTest : public ::testing::Test
+{
+protected:
+ SortBlobWritersTest();
+ ~SortBlobWritersTest() override;
+};
+
+
+template <typename Param>
+SortBlobWritersTest<Param>::SortBlobWritersTest()
+ : ::testing::Test()
+{
+}
+
+template <typename Param>
+SortBlobWritersTest<Param>::~SortBlobWritersTest() = default;
+
+struct Int8Params {
+ using Type = int8_t;
+ static constexpr int8_t value = 42;
+ static SortData sort_asc;
+ static SortData sort_desc;
+ static std::vector<int8_t> values;
+ static constexpr int8_t min_value = -4;
+ static constexpr int8_t max_value = 9;
+};
+
+SortData Int8Params::sort_asc{0, 128 ^ 42};
+SortData Int8Params::sort_desc{0, 127 ^ 42};
+std::vector<int8_t> Int8Params::values{5, 7, -4, 9};
+
+struct Int16Params {
+ using Type = int16_t;
+ static constexpr int16_t value = 43;
+ static SortData sort_asc;
+ static SortData sort_desc;
+ static std::vector<int16_t> values;
+ static constexpr int16_t min_value = -4;
+ static constexpr int16_t max_value = 9;
+};
+
+SortData Int16Params::sort_asc{0, 128, 43};
+SortData Int16Params::sort_desc{0, 127, 255 ^ 43};
+std::vector<int16_t> Int16Params::values{5, 7, -4, 9};
+
+struct Int32Params {
+ using Type = int32_t;
+ static constexpr int16_t value = 44;
+ static SortData sort_asc;
+ static SortData sort_desc;
+ static std::vector<int32_t> values;
+ static constexpr int32_t min_value = -4;
+ static constexpr int32_t max_value = 9;
+};
+
+SortData Int32Params::sort_asc{0, 128, 0, 0, 44};
+SortData Int32Params::sort_desc{0, 127, 255, 255, 255 ^ 44};
+std::vector<int32_t> Int32Params::values{5, 7, -4, 9};
+
+struct Int64Params {
+ using Type = int64_t;
+ static constexpr int16_t value = 45;
+ static SortData sort_asc;
+ static SortData sort_desc;
+ static std::vector<int64_t> values;
+ static constexpr int64_t min_value = -4;
+ static constexpr int64_t max_value = 9;
+};
+
+SortData Int64Params::sort_asc{0, 128, 0, 0, 0, 0, 0, 0, 45};
+SortData Int64Params::sort_desc{0, 127, 255, 255, 255, 255, 255, 255, 255 ^ 45};
+std::vector<int64_t> Int64Params::values{5, 7, -4, 9};
+
+struct FloatParams {
+ using Type = float;
+ static constexpr int16_t value = 46;
+ static std::vector<float> values;
+ static std::vector<float> values_with_nan;
+ static std::vector<float> values_only_nan;
+ static constexpr float min_value = -4;
+ static constexpr float max_value = 9;
+};
+
+std::vector<float> FloatParams::values{5, 7, -4, 9};
+std::vector<float> FloatParams::values_with_nan{5, 7, std::numeric_limits<float>::quiet_NaN(), -4, 9};
+std::vector<float> FloatParams::values_only_nan{std::numeric_limits<float>::quiet_NaN()};
+
+struct DoubleParams {
+ using Type = double;
+ static constexpr int16_t value = 47;
+ static std::vector<double> values;
+ static std::vector<double> values_with_nan;
+ static std::vector<double> values_only_nan;
+ static constexpr double min_value = -4;
+ static constexpr double max_value = 9;
+};
+
+std::vector<double> DoubleParams::values{5, 7, -4, 9};
+std::vector<double> DoubleParams::values_with_nan{5, 7, std::numeric_limits<double>::quiet_NaN(), -4, 9};
+std::vector<double> DoubleParams::values_only_nan{std::numeric_limits<double>::quiet_NaN()};
+
+struct StringParams {
+ using Type = const char*;
+ static constexpr const char* value = "Hello";
+ static std::vector<const char*> values;
+ static constexpr const char* min_value = "always";
+ static constexpr const char* max_value = "this";
+};
+
+std::vector<const char*> StringParams::values{"this", "always", "happens"};
+
+using SortBlobWritersTestTypes = testing::Types<Int8Params, Int16Params, Int32Params, Int64Params, FloatParams, DoubleParams, StringParams>;
+
+TYPED_TEST_SUITE(SortBlobWritersTest, SortBlobWritersTestTypes);
+
+TYPED_TEST(SortBlobWritersTest, empty_arrays)
+{
+ using Type = typename TypeParam::Type;
+ EXPECT_EQ(missing_value, sort_data<Type>({}, true));
+ EXPECT_EQ(missing_value, sort_data<Type>({}, false));
+}
+
+TYPED_TEST(SortBlobWritersTest, single_values)
+{
+ using Type = typename TypeParam::Type;
+ auto& value = TypeParam::value;
+ EXPECT_EQ(serialized_present<Type>(value, true), sort_data<Type>({value}, true));
+ EXPECT_EQ(serialized_present<Type>(value, false), sort_data<Type>({value}, false));
+ if constexpr (std::is_integral_v<Type>) {
+ EXPECT_EQ(TypeParam::sort_asc, sort_data<Type>({value}, true));
+ EXPECT_EQ(TypeParam::sort_desc, sort_data<Type>({value}, false));
+ }
+ EXPECT_EQ(switch_sort_order(sort_data<Type>({value}, false)), sort_data<Type>({value}, true));
+ EXPECT_EQ(switch_sort_order(sort_data<Type>({value}, true)), sort_data<Type>({value}, false));
+ EXPECT_GT(missing_value, sort_data<Type>({value}, true));
+ EXPECT_GT(missing_value, sort_data<Type>({value}, false));
+}
+
+TYPED_TEST(SortBlobWritersTest, multiple_values)
+{
+ using Type = typename TypeParam::Type;
+ auto& values = TypeParam::values;
+ EXPECT_EQ(serialized_present<Type>(TypeParam::min_value, true), sort_data<Type>(values, true));
+ EXPECT_EQ(serialized_present<Type>(TypeParam::max_value, false), sort_data<Type>(values, false));
+}
+
+template <typename Param>
+using SortBlobFloatingPointWritersTest = SortBlobWritersTest<Param>;
+
+using SortBlobFloatingPointWritersTestTypes = testing::Types<FloatParams, DoubleParams>;
+
+TYPED_TEST_SUITE(SortBlobFloatingPointWritersTest, SortBlobFloatingPointWritersTestTypes);
+
+TYPED_TEST(SortBlobFloatingPointWritersTest, skip_nan_values)
+{
+ using Type = typename TypeParam::Type;
+ auto& values_only_nan = TypeParam::values_only_nan;
+ auto& values_with_nan = TypeParam::values_with_nan;
+ EXPECT_EQ(missing_value, sort_data<Type>(values_only_nan, true));
+ EXPECT_EQ(missing_value, sort_data<Type>(values_only_nan, false));
+ EXPECT_EQ(serialized_present<Type>(TypeParam::min_value, true), sort_data<Type>(values_with_nan, true));
+ EXPECT_EQ(serialized_present<Type>(TypeParam::max_value, false), sort_data<Type>(values_with_nan, false));
+}
+
+using SortBlobStringWriterTest = SortBlobWritersTest<const char*>;
+
+TEST_F(SortBlobStringWriterTest, blob_converter_is_used)
+{
+ LowercaseConverter lowercase;
+ EXPECT_EQ(serialized_present_string("hello", true), sort_data_string({"Hello"}, &lowercase, true));
+ EXPECT_EQ(serialized_present_string("hello", false), sort_data_string({"Hello"}, &lowercase, false));
+ EXPECT_EQ(serialized_present_string("always", true), sort_data_string({"Hello", "always"}, &lowercase, true));
+ EXPECT_EQ(serialized_present_string("hello", false), sort_data_string({"Hello", "always"}, &lowercase, false));
+}
+
+TEST_F(SortBlobStringWriterTest, prefix_is_first)
+{
+ EXPECT_EQ(serialized_present_string("aaa", true), sort_data_string({"aaa", "aaaa"}, true));
+ EXPECT_EQ(serialized_present_string("aaaa", false), sort_data_string({"aaa", "aaaa"}, false));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 896226f005d..c54169ffd11 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -100,6 +100,7 @@ vespa_add_library(searchlib_attribute OBJECT
numeric_matcher.cpp
numeric_range_matcher.cpp
numeric_search_context.cpp
+ numeric_sort_blob_writer.cpp
posting_list_merger.cpp
postingchange.cpp
postinglistattribute.cpp
@@ -143,6 +144,7 @@ vespa_add_library(searchlib_attribute OBJECT
string_matcher.cpp
string_search_context.cpp
string_search_helper.cpp
+ string_sort_blob_writer.cpp
valuemodifier.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.cpp b/searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.cpp
new file mode 100644
index 00000000000..0c2aacc4ef1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.cpp
@@ -0,0 +1,77 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "numeric_sort_blob_writer.h"
+#include <vespa/vespalib/util/sort.h>
+#include <cmath>
+#include <type_traits>
+
+namespace search::attribute {
+
+template <typename T, bool asc>
+NumericSortBlobWriter<T, asc>::NumericSortBlobWriter() noexcept
+: _best()
+{
+}
+
+template <typename T, bool asc>
+NumericSortBlobWriter<T, asc>::~NumericSortBlobWriter() noexcept = default;
+
+template <typename T, bool asc>
+void
+NumericSortBlobWriter<T, asc>::candidate(T val)
+{
+ if constexpr (std::disjunction_v<std::is_same<T,float>,std::is_same<T, double>>) {
+ if (std::isnan(val)) {
+ return;
+ }
+ }
+ if (_best.has_value()) {
+ if constexpr (asc) {
+ if (_best.value() <= val) {
+ return;
+ }
+ } else {
+ if (_best.value() >= val) {
+ return;
+ }
+ }
+ }
+ _best = val;
+}
+
+template <typename T, bool asc>
+long
+NumericSortBlobWriter<T, asc>::write(void *serTo, size_t available)
+{
+ auto dst = static_cast<unsigned char*>(serTo);
+ if (_best.has_value()) {
+ if (available < 1 + sizeof(T)) {
+ return -1;
+ }
+ *dst = has_value;
+ auto ret = vespalib::serializeForSort<vespalib::convertForSort<T, asc>>(_best.value(), dst + 1, available - 1);
+ return (ret >= 0) ? (ret + 1) : -1;
+ } else {
+ if (available < 1) {
+ return -1;
+ }
+ *dst = missing_value;
+ return 1;
+ }
+}
+
+template class NumericSortBlobWriter<int8_t, true>;
+template class NumericSortBlobWriter<int16_t, true>;
+template class NumericSortBlobWriter<int32_t, true>;
+template class NumericSortBlobWriter<int64_t, true>;
+template class NumericSortBlobWriter<float, true>;
+template class NumericSortBlobWriter<double, true>;
+
+template class NumericSortBlobWriter<int8_t, false>;
+template class NumericSortBlobWriter<int16_t, false>;
+template class NumericSortBlobWriter<int32_t, false>;
+template class NumericSortBlobWriter<int64_t, false>;
+template class NumericSortBlobWriter<float, false>;
+template class NumericSortBlobWriter<double, false>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.h b/searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.h
new file mode 100644
index 00000000000..b2b6d578628
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/numeric_sort_blob_writer.h
@@ -0,0 +1,25 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "sort_blob_writer.h"
+#include <optional>
+
+namespace search::attribute {
+
+/*
+ * Class for writing numeric sort blobs for arrays and
+ * weighted sets of type T with ascending or descending
+ * sort order.
+ */
+template <typename T, bool asc>
+class NumericSortBlobWriter : public SortBlobWriter {
+ std::optional<T> _best;
+public:
+ NumericSortBlobWriter() noexcept;
+ ~NumericSortBlobWriter() noexcept;
+ void candidate(T val);
+ long write(void *serTo, size_t available);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/sort_blob_writer.h b/searchlib/src/vespa/searchlib/attribute/sort_blob_writer.h
new file mode 100644
index 00000000000..31aca300349
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/sort_blob_writer.h
@@ -0,0 +1,21 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/*
+ * Base class for sort blob writers. Contains definition of tags
+ * describing if value is present (any present value sorts before
+ * a missing value).
+ */
+class SortBlobWriter {
+public:
+ /*
+ * Missing value is always sorted last.
+ */
+ static constexpr unsigned char has_value = 0;
+ static constexpr unsigned char missing_value = 1;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.cpp b/searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.cpp
new file mode 100644
index 00000000000..dad1df35847
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.cpp
@@ -0,0 +1,72 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "string_sort_blob_writer.h"
+#include <vespa/searchcommon/common/iblobconverter.h>
+#include <vespa/vespalib/util/arrayref.h>
+#include <algorithm>
+#include <cstring>
+
+namespace search::attribute {
+
+StringSortBlobWriter::StringSortBlobWriter(void* serialize_to, size_t available, const BlobConverter* bc, bool asc) noexcept
+ : _best_size(),
+ _serialize_to(static_cast<unsigned char*>(serialize_to)),
+ _available(available),
+ _bc(bc),
+ _asc(asc)
+{
+}
+
+StringSortBlobWriter::~StringSortBlobWriter() noexcept = default;
+
+bool
+StringSortBlobWriter::candidate(const char* val)
+{
+ int size = std::strlen(val) + 1;
+ vespalib::ConstBufferRef buf(val, size);
+ if (_bc != nullptr) {
+ buf = _bc->convert(buf);
+ }
+ if (_best_size.has_value()) {
+ auto common_size = std::min(_best_size.value(), buf.size());
+ auto cmpres = std::memcmp(_serialize_to + 1, buf.data(), common_size);
+ if (_asc) {
+ if (cmpres < 0 || (cmpres == 0 && _best_size.value() < buf.size())) {
+ return true;
+ }
+ } else {
+ if (cmpres > 0 || (cmpres == 0 && _best_size.value() > buf.size())) {
+ return true;
+ }
+ }
+ }
+ if (_available < buf.size() + 1) {
+ return false;
+ }
+ _serialize_to[0] = has_value;
+ memcpy(_serialize_to + 1, buf.data(), buf.size());
+ _best_size = buf.size();
+ return true;
+}
+
+long
+StringSortBlobWriter::write()
+{
+ if (_best_size.has_value()) {
+ if (!_asc) {
+ vespalib::ArrayRef<unsigned char> buf(_serialize_to + 1, _best_size.value());
+ for (auto& c : buf) {
+ c = 0xff - c;
+ }
+ }
+ return 1 + _best_size.value();
+ } else {
+ if (_available < 1) {
+ return -1;
+ }
+ _serialize_to[0] = missing_value;
+ return 1;
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.h b/searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.h
new file mode 100644
index 00000000000..12b1d871875
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/string_sort_blob_writer.h
@@ -0,0 +1,31 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "sort_blob_writer.h"
+#include <optional>
+
+namespace search::common { class BlobConverter; }
+
+namespace search::attribute {
+
+/*
+ * Class for writing numeric sort blobs for arrays and
+ * weighted sets of string with ascending or descending
+ * sort order.
+ */
+class StringSortBlobWriter : public SortBlobWriter {
+ using BlobConverter = common::BlobConverter;
+ std::optional<size_t> _best_size;
+ unsigned char* _serialize_to;
+ size_t _available;
+ const BlobConverter* _bc;
+ const bool _asc;
+public:
+ StringSortBlobWriter(void* serialize_to, size_t available, const BlobConverter* bc, bool asc) noexcept;
+ ~StringSortBlobWriter() noexcept;
+ bool candidate(const char* val);
+ long write();
+};
+
+}