aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-03-21 21:23:29 +0100
committerTor Egge <Tor.Egge@online.no>2023-03-21 21:23:29 +0100
commita74fc174fd55f39aaaf4fbe1d68ce5a2e7719999 (patch)
tree43dca1c0328119b4bf7e559ebc37aae4ff20f052 /searchlib
parent4100c3f0f060f91e8fc595e8f1c47520aa807ec9 (diff)
Adjust serialized sort data for raw attributes.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp59
2 files changed, 61 insertions, 23 deletions
diff --git a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
index 9f728cc0482..2278b49e31b 100644
--- a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
+++ b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp
@@ -86,20 +86,31 @@ TEST_F(RawAttributeTest, can_set_and_clear_value)
}
TEST_F(RawAttributeTest, implements_serialize_for_sort) {
+ std::vector<char> escapes{1, 0, char(0xff), char(0xfe), 1};
vespalib::string long_hello("hello, is there anybody out there");
vespalib::ConstArrayRef<char> raw_long_hello(long_hello.c_str(), long_hello.size());
uint8_t buf[8];
memset(buf, 0, sizeof(buf));
_attr->addDocs(10);
_attr->commit();
- EXPECT_EQ(0, _attr->serializeForAscendingSort(1, buf, sizeof(buf)));
- EXPECT_EQ(0, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
+ EXPECT_EQ(1, _attr->serializeForAscendingSort(1, buf, sizeof(buf)));
+ EXPECT_EQ(0, buf[0]);
+ EXPECT_EQ(1, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
+ EXPECT_EQ(0xff, buf[0]);
_raw->set_raw(1, raw_hello);
- EXPECT_EQ(5, _attr->serializeForAscendingSort(1, buf, sizeof(buf)));
- EXPECT_EQ(0, memcmp("hello", buf, 5));
- EXPECT_EQ(5, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
- uint8_t expected [] = {0xff-'h', 0xff-'e', 0xff-'l', 0xff-'l', 0xff-'o'};
- EXPECT_EQ(0, memcmp(expected, buf, 5));
+ EXPECT_EQ(6, _attr->serializeForAscendingSort(1, buf, sizeof(buf)));
+ uint8_t hello_asc[] = {0x01+'h', 0x01+'e', 0x01+'l', 0x01+'l', 0x01+'o', 0x00};
+ EXPECT_EQ(0, memcmp(hello_asc, buf, 6));
+ EXPECT_EQ(6, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
+ uint8_t hello_desc[] = {0xfe -'h', 0xfe -'e', 0xfe -'l', 0xfe -'l', 0xfe -'o', 0xff};
+ EXPECT_EQ(0, memcmp(hello_desc, buf, 6));
+ _raw->set_raw(1, escapes);
+ EXPECT_EQ(8, _attr->serializeForAscendingSort(1, buf, sizeof(buf)));
+ uint8_t escapes_asc[] = {0x02, 0x01, 0xff, 0xff, 0xff, 0xfe, 0x02, 0x00};
+ EXPECT_EQ(0, memcmp(escapes_asc, buf, 8));
+ EXPECT_EQ(8, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
+ uint8_t escapes_desc[] = {0xfd, 0xfe, 0x00, 0x00, 0x00, 0x01, 0xfd, 0xff};
+ EXPECT_EQ(0, memcmp(escapes_desc, buf, 8));
_raw->set_raw(1, raw_long_hello);
EXPECT_EQ(-1, _attr->serializeForAscendingSort(1, buf, sizeof(buf)));
EXPECT_EQ(-1, _attr->serializeForDescendingSort(1, buf, sizeof(buf)));
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp
index 65841f59827..f8c6768ba5b 100644
--- a/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "raw_attribute.h"
+#include <cassert>
namespace search::attribute {
@@ -11,32 +12,58 @@ RawAttribute::RawAttribute(const vespalib::string& name, const Config& config)
RawAttribute::~RawAttribute() = default;
-long
-RawAttribute::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
+namespace {
+
+template <bool desc>
+unsigned char remap(unsigned char val)
{
- auto raw = get_raw(doc);
- if (available >= (long)raw.size()) {
- memcpy(serTo, raw.data(), raw.size());
+ return (desc ? (0xff - val) : val);
+}
+
+template <bool desc>
+long serialize_for_sort(vespalib::ConstArrayRef<char> raw, void* serTo, long available)
+{
+ auto src = reinterpret_cast<const unsigned char *>(raw.data());
+ auto src_end = src + raw.size();
+ size_t extra = 1;
+ for (auto p = src; p != src_end; ++p) {
+ if (*p >= 0xfe) {
+ ++extra;
+ }
+ }
+ if (available >= (long)(raw.size() + extra)) {
+ auto dst = static_cast<unsigned char *>(serTo);
+ auto dst_orig = dst;
+ for (auto p = src; p != src_end; ++p) {
+ if (*p >= 0xfe) {
+ *dst++ = remap<desc>(0xff);
+ *dst++ = remap<desc>(*p);
+ } else {
+ *dst++ = remap<desc>(*p + 1);
+ }
+ }
+ *dst++ = remap<desc>(0);
+ assert(raw.size() + extra + dst_orig == dst);
} else {
return -1;
}
- return raw.size();
+ return raw.size() + extra;
+}
+
+}
+
+long
+RawAttribute::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
+{
+ auto raw = get_raw(doc);
+ return serialize_for_sort<false>(raw, serTo, available);
}
long
RawAttribute::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
{
auto raw = get_raw(doc);
- if (available >= (long)raw.size()) {
- auto *dst = static_cast<unsigned char *>(serTo);
- const auto * src(reinterpret_cast<const uint8_t *>(raw.data()));
- for (size_t i(0); i < raw.size(); ++i) {
- dst[i] = 0xff - src[i];
- }
- } else {
- return -1;
- }
- return raw.size();
+ return serialize_for_sort<true>(raw, serTo, available);
}
}