diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-03-21 21:23:29 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-03-21 21:23:29 +0100 |
commit | a74fc174fd55f39aaaf4fbe1d68ce5a2e7719999 (patch) | |
tree | 43dca1c0328119b4bf7e559ebc37aae4ff20f052 /searchlib | |
parent | 4100c3f0f060f91e8fc595e8f1c47520aa807ec9 (diff) |
Adjust serialized sort data for raw attributes.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp | 25 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp | 59 |
2 files changed, 61 insertions, 23 deletions
diff --git a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp index 9f728cc0482..2278b49e31b 100644 --- a/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp +++ b/searchlib/src/tests/attribute/raw_attribute/raw_attribute_test.cpp @@ -86,20 +86,31 @@ TEST_F(RawAttributeTest, can_set_and_clear_value) } TEST_F(RawAttributeTest, implements_serialize_for_sort) { + std::vector<char> escapes{1, 0, char(0xff), char(0xfe), 1}; vespalib::string long_hello("hello, is there anybody out there"); vespalib::ConstArrayRef<char> raw_long_hello(long_hello.c_str(), long_hello.size()); uint8_t buf[8]; memset(buf, 0, sizeof(buf)); _attr->addDocs(10); _attr->commit(); - EXPECT_EQ(0, _attr->serializeForAscendingSort(1, buf, sizeof(buf))); - EXPECT_EQ(0, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); + EXPECT_EQ(1, _attr->serializeForAscendingSort(1, buf, sizeof(buf))); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ(1, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); + EXPECT_EQ(0xff, buf[0]); _raw->set_raw(1, raw_hello); - EXPECT_EQ(5, _attr->serializeForAscendingSort(1, buf, sizeof(buf))); - EXPECT_EQ(0, memcmp("hello", buf, 5)); - EXPECT_EQ(5, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); - uint8_t expected [] = {0xff-'h', 0xff-'e', 0xff-'l', 0xff-'l', 0xff-'o'}; - EXPECT_EQ(0, memcmp(expected, buf, 5)); + EXPECT_EQ(6, _attr->serializeForAscendingSort(1, buf, sizeof(buf))); + uint8_t hello_asc[] = {0x01+'h', 0x01+'e', 0x01+'l', 0x01+'l', 0x01+'o', 0x00}; + EXPECT_EQ(0, memcmp(hello_asc, buf, 6)); + EXPECT_EQ(6, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); + uint8_t hello_desc[] = {0xfe -'h', 0xfe -'e', 0xfe -'l', 0xfe -'l', 0xfe -'o', 0xff}; + EXPECT_EQ(0, memcmp(hello_desc, buf, 6)); + _raw->set_raw(1, escapes); + EXPECT_EQ(8, _attr->serializeForAscendingSort(1, buf, sizeof(buf))); + uint8_t escapes_asc[] = {0x02, 0x01, 0xff, 0xff, 0xff, 0xfe, 0x02, 0x00}; + EXPECT_EQ(0, memcmp(escapes_asc, buf, 8)); + EXPECT_EQ(8, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); + uint8_t escapes_desc[] = {0xfd, 0xfe, 0x00, 0x00, 0x00, 0x01, 0xfd, 0xff}; + EXPECT_EQ(0, memcmp(escapes_desc, buf, 8)); _raw->set_raw(1, raw_long_hello); EXPECT_EQ(-1, _attr->serializeForAscendingSort(1, buf, sizeof(buf))); EXPECT_EQ(-1, _attr->serializeForDescendingSort(1, buf, sizeof(buf))); diff --git a/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp index 65841f59827..f8c6768ba5b 100644 --- a/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "raw_attribute.h" +#include <cassert> namespace search::attribute { @@ -11,32 +12,58 @@ RawAttribute::RawAttribute(const vespalib::string& name, const Config& config) RawAttribute::~RawAttribute() = default; -long -RawAttribute::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const +namespace { + +template <bool desc> +unsigned char remap(unsigned char val) { - auto raw = get_raw(doc); - if (available >= (long)raw.size()) { - memcpy(serTo, raw.data(), raw.size()); + return (desc ? (0xff - val) : val); +} + +template <bool desc> +long serialize_for_sort(vespalib::ConstArrayRef<char> raw, void* serTo, long available) +{ + auto src = reinterpret_cast<const unsigned char *>(raw.data()); + auto src_end = src + raw.size(); + size_t extra = 1; + for (auto p = src; p != src_end; ++p) { + if (*p >= 0xfe) { + ++extra; + } + } + if (available >= (long)(raw.size() + extra)) { + auto dst = static_cast<unsigned char *>(serTo); + auto dst_orig = dst; + for (auto p = src; p != src_end; ++p) { + if (*p >= 0xfe) { + *dst++ = remap<desc>(0xff); + *dst++ = remap<desc>(*p); + } else { + *dst++ = remap<desc>(*p + 1); + } + } + *dst++ = remap<desc>(0); + assert(raw.size() + extra + dst_orig == dst); } else { return -1; } - return raw.size(); + return raw.size() + extra; +} + +} + +long +RawAttribute::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const +{ + auto raw = get_raw(doc); + return serialize_for_sort<false>(raw, serTo, available); } long RawAttribute::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const { auto raw = get_raw(doc); - if (available >= (long)raw.size()) { - auto *dst = static_cast<unsigned char *>(serTo); - const auto * src(reinterpret_cast<const uint8_t *>(raw.data())); - for (size_t i(0); i < raw.size(); ++i) { - dst[i] = 0xff - src[i]; - } - } else { - return -1; - } - return raw.size(); + return serialize_for_sort<true>(raw, serTo, available); } } |