diff options
14 files changed, 452 insertions, 26 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 8626f2b6291..ab4074bbb4e 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -70,7 +70,7 @@ public class Flags { public static final UnboundBooleanFlag KEEP_STORAGE_NODE_UP = defineFeatureFlag( "keep-storage-node-up", true, - List.of("hakonhall"), "2022-07-07", "2022-11-07", + List.of("hakonhall"), "2022-07-07", "2022-12-07", "Whether to leave the storage node (with wanted state) UP while the node is permanently down.", "Takes effect immediately for nodes transitioning to permanently down.", ZONE_ID, APPLICATION_ID); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 4b9c23ea5d3..6f4ffc31741 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -679,7 +679,7 @@ getExpectedBlueprint() " estHits: 9\n" " cost_tier: 1\n" " tree_size: 2\n" - " allow_termwise_eval: 0\n" + " allow_termwise_eval: false\n" " }\n" " sourceId: 4294967295\n" " docid_limit: 0\n" @@ -698,7 +698,7 @@ getExpectedBlueprint() " estHits: 9\n" " cost_tier: 1\n" " tree_size: 1\n" - " allow_termwise_eval: 1\n" + " allow_termwise_eval: true\n" " }\n" " sourceId: 4294967295\n" " docid_limit: 0\n" @@ -727,7 +727,7 @@ getExpectedSlimeBlueprint() { " estHits: 9," " cost_tier: 1," " tree_size: 2," - " allow_termwise_eval: 0" + " allow_termwise_eval: false" " }," " sourceId: 4294967295," " docid_limit: 0," @@ -751,7 +751,7 @@ getExpectedSlimeBlueprint() { " estHits: 9," " cost_tier: 1," " tree_size: 1," - " allow_termwise_eval: 1" + " allow_termwise_eval: true" " }," " sourceId: 4294967295," " docid_limit: 0" diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index 1ac91c5d8d6..55ca42f7369 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -595,7 +595,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) " estHits: 2\n" " cost_tier: 1\n" " tree_size: 2\n" - " allow_termwise_eval: 0\n" + " allow_termwise_eval: false\n" " }\n" " sourceId: 4294967295\n" " docid_limit: 0\n" @@ -617,7 +617,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) " estHits: 2\n" " cost_tier: 1\n" " tree_size: 1\n" - " allow_termwise_eval: 1\n" + " allow_termwise_eval: true\n" " }\n" " sourceId: 4294967295\n" " docid_limit: 0\n" diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 179296ff0f9..692b86fdc75 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -45,6 +45,8 @@ #include <vespa/log/log.h> LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory"); +using search::attribute::BasicType; +using search::attribute::CollectionType; using search::attribute::IAttributeVector; using search::attribute::ISearchContext; using search::fef::TermFieldMatchData; @@ -52,6 +54,7 @@ using search::fef::TermFieldMatchDataArray; using search::fef::TermFieldMatchDataPosition; using search::query::Location; using search::query::LocationTerm; +using search::query::MultiTerm; using search::query::Node; using search::query::NumberTerm; using search::query::PredicateQuery; @@ -62,7 +65,6 @@ using search::query::StackDumpCreator; using search::query::StringTerm; using search::query::SubstringTerm; using search::query::SuffixTerm; -using search::query::MultiTerm; using search::queryeval::AndBlueprint; using search::queryeval::AndSearchStrict; using search::queryeval::Blueprint; @@ -84,11 +86,11 @@ using search::queryeval::SimpleLeafBlueprint; using search::queryeval::WeightedSetTermBlueprint; using search::tensor::DenseTensorAttribute; using search::tensor::ITensorAttribute; +using vespalib::Issue; using vespalib::geo::ZCurve; using vespalib::make_string; using vespalib::string; using vespalib::stringref; -using vespalib::Issue; namespace search { namespace { @@ -116,6 +118,7 @@ private: class AttributeFieldBlueprint : public SimpleLeafBlueprint { private: + const IAttributeVector& _attr; // Must take a copy of the query term for visitMembers() // as only a few ISearchContext implementations exposes the query term. vespalib::string _query_term; @@ -126,6 +129,7 @@ private: AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute, QueryTermSimple::UP term, const attribute::SearchContextParams ¶ms) : SimpleLeafBlueprint(field), + _attr(attribute), _query_term(term->getTermString()), _search_context(attribute.createSearchContext(std::move(term), params)), _type(OTHER) @@ -195,11 +199,39 @@ public: bool getRange(vespalib::string &from, vespalib::string &to) const override; }; +namespace { + +vespalib::string +get_type(const IAttributeVector& attr) +{ + auto coll_type = CollectionType(attr.getCollectionType()); + auto basic_type = BasicType(attr.getBasicType()); + if (coll_type.type() == CollectionType::SINGLE) { + return basic_type.asString(); + } + std::ostringstream oss; + oss << coll_type.asString() << "<" << basic_type.asString() << ">"; + return oss.str(); +} + +void +visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr) +{ + visitor.openStruct("attribute", "IAttributeVector"); + visitor.visitString("name", attr.getName()); + visitor.visitString("type", get_type(attr)); + visitor.visitBool("fast_search", attr.getIsFastSearch()); + visitor.visitBool("filter", attr.getIsFilter()); + visitor.closeStruct(); +} + +} + void AttributeFieldBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const { LeafBlueprint::visitMembers(visitor); - visit(visitor, "attribute", _search_context->attributeName()); + visit_attribute(visitor, _attr); visit(visitor, "query_term", _query_term); } @@ -275,6 +307,11 @@ public: search->fetchPostings(execInfo); } } + + void visitMembers(vespalib::ObjectVisitor& visitor) const override { + LeafBlueprint::visitMembers(visitor); + visit_attribute(visitor, _attribute); + } }; LocationPreFilterBlueprint::~LocationPreFilterBlueprint() = default; @@ -325,6 +362,10 @@ public: SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override { return create_default_filter(strict, constraint); } + void visitMembers(vespalib::ObjectVisitor& visitor) const override { + LeafBlueprint::visitMembers(visitor); + visit_attribute(visitor, _attribute); + } }; //----------------------------------------------------------------------------- @@ -436,6 +477,10 @@ public: return {}; } } + void visitMembers(vespalib::ObjectVisitor& visitor) const override { + LeafBlueprint::visitMembers(visitor); + visit_attribute(visitor, _iattr); + } }; template <typename SearchType> @@ -623,7 +668,7 @@ public: void visitMembers(vespalib::ObjectVisitor &visitor) const override { LeafBlueprint::visitMembers(visitor); - visit(visitor, "attribute", _attrName); + visit_attribute(visitor, _iattr); } std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override { if (fields.has_field(_attrName)) { diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 9c9df6b82fe..91aa308f008 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -336,7 +336,7 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visitor.visitInt("estHits", state.estimate().estHits); visitor.visitInt("cost_tier", state.cost_tier()); visitor.visitInt("tree_size", state.tree_size()); - visitor.visitInt("allow_termwise_eval", state.allow_termwise_eval()); + visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval()); visitor.closeStruct(); visitor.visitInt("sourceId", _sourceId); visitor.visitInt("docid_limit", _docid_limit); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 922c2fefa28..df732d3ab24 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -68,7 +68,7 @@ is_present(uint8_t presence_flag) { class IndexBuilder { public: virtual ~IndexBuilder() = default; - virtual void add(uint32_t lid, EntryRef ref) = 0; + virtual void add(uint32_t lid) = 0; virtual void wait_complete() = 0; }; @@ -78,7 +78,7 @@ public: */ class ThreadedIndexBuilder : public IndexBuilder { public: - ThreadedIndexBuilder(AttributeVector& attr, vespalib::GenerationHandler& generation_handler, TensorStore& store, NearestNeighborIndex& index, vespalib::Executor& shared_executor) + ThreadedIndexBuilder(TensorAttribute& attr, vespalib::GenerationHandler& generation_handler, TensorStore& store, NearestNeighborIndex& index, vespalib::Executor& shared_executor) : _attr(attr), _generation_handler(generation_handler), _store(store), @@ -87,7 +87,7 @@ public: _queue(MAX_PENDING), _pending(0) {} - void add(uint32_t lid, EntryRef ref) override; + void add(uint32_t lid) override; void wait_complete() override { drainUntilPending(0); } @@ -134,7 +134,7 @@ private: } } static constexpr uint32_t MAX_PENDING = 1000; - AttributeVector& _attr; + TensorAttribute& _attr; const vespalib::GenerationHandler& _generation_handler; TensorStore& _store; NearestNeighborIndex& _index; @@ -146,7 +146,7 @@ private: }; void -ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) { +ThreadedIndexBuilder::add(uint32_t lid) { Entry item; while (pop(item)) { // First process items that are ready to complete @@ -157,9 +157,8 @@ ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) { // Then we can issue a new one ++_pending; - auto dense_store = _store.as_dense(); - auto task = vespalib::makeLambdaTask([this, ref, lid, dense_store]() { - auto prepared = _index.prepare_add_document(lid, dense_store->get_vectors(ref), + auto task = vespalib::makeLambdaTask([this, lid]() { + auto prepared = _index.prepare_add_document(lid, _attr.get_vectors(lid), _generation_handler.takeGuard()); std::unique_lock guard(_mutex); _queue.push(std::make_pair(lid, std::move(prepared))); @@ -177,7 +176,7 @@ public: _index(index) { } - void add(uint32_t lid, EntryRef) override { + void add(uint32_t lid) override { _index.add_document(lid); if ((lid % LOAD_COMMIT_INTERVAL) == 0) { _attr.commit(); @@ -193,7 +192,7 @@ private: } -TensorAttributeLoader::TensorAttributeLoader(AttributeVector& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index) +TensorAttributeLoader::TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index) : _attr(attr), _generation_handler(generation_handler), _ref_vector(ref_vector), @@ -261,7 +260,7 @@ TensorAttributeLoader::build_index(vespalib::Executor* executor, uint32_t docid_ for (uint32_t lid = 0; lid < docid_limit; ++lid) { auto ref = _ref_vector[lid].load_relaxed(); if (ref.valid()) { - builder->add(lid, ref); + builder->add(lid); } } builder->wait_complete(); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h index 97add17d8f3..9417737cec5 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h @@ -5,7 +5,6 @@ #include <vespa/vespalib/datastore/atomic_entry_ref.h> #include <vespa/vespalib/util/rcuvector.h> -namespace search { class AttributeVector; } namespace vespalib { class Executor; } namespace search::tensor { @@ -13,6 +12,7 @@ namespace search::tensor { class BlobSequenceReader; class DenseTensorStore; class NearestNeighborIndex; +class TensorAttribute; class TensorStore; /** @@ -23,7 +23,7 @@ class TensorAttributeLoader { using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; using GenerationHandler = vespalib::GenerationHandler; using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>; - AttributeVector& _attr; + TensorAttribute& _attr; GenerationHandler& _generation_handler; RefVector& _ref_vector; TensorStore& _store; @@ -35,7 +35,7 @@ class TensorAttributeLoader { bool load_index(); public: - TensorAttributeLoader(AttributeVector& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index); + TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index); ~TensorAttributeLoader(); bool on_load(vespalib::Executor* executor); }; diff --git a/security-utils/src/main/java/com/yahoo/security/Base58.java b/security-utils/src/main/java/com/yahoo/security/Base58.java new file mode 100644 index 00000000000..3010bc878a8 --- /dev/null +++ b/security-utils/src/main/java/com/yahoo/security/Base58.java @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.security; + +/** + * Base58 encoding using the alphabet standardized by Bitcoin et al., which avoids + * the use of characters [0OIl] to avoid visual ambiguity. It does not feature any + * potential word/line-breaking characters, which means encoded strings can usually + * be selected in one go on web pages or in the terminal. + * + * @see <a href="https://en.wikipedia.org/wiki/Base58">Base58 on Wiki</a> + * + * @author vekterli + */ +public class Base58 { + + private static final BaseNCodec INSTANCE = BaseNCodec.of("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"); + + public static BaseNCodec codec() { + return INSTANCE; + } + +} diff --git a/security-utils/src/main/java/com/yahoo/security/Base62.java b/security-utils/src/main/java/com/yahoo/security/Base62.java new file mode 100644 index 00000000000..86c60a1bb1d --- /dev/null +++ b/security-utils/src/main/java/com/yahoo/security/Base62.java @@ -0,0 +1,21 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.security; + +/** + * Base62 encoding which has the nice property that it does not feature any + * potential word/line-breaking characters, which means encoded strings can + * usually be selected in one go on web pages or in the terminal. + * + * @see <a href="https://en.wikipedia.org/wiki/Base62">Base62 on Wiki</a> + * + * @author vekterli + */ +public class Base62 { + + private static final BaseNCodec INSTANCE = BaseNCodec.of("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + + public static BaseNCodec codec() { + return INSTANCE; + } + +} diff --git a/security-utils/src/main/java/com/yahoo/security/BaseNCodec.java b/security-utils/src/main/java/com/yahoo/security/BaseNCodec.java new file mode 100644 index 00000000000..0921f238460 --- /dev/null +++ b/security-utils/src/main/java/com/yahoo/security/BaseNCodec.java @@ -0,0 +1,151 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.security; + +import java.math.BigInteger; +import java.util.Arrays; + +/** + * <p> + * Codec that enables easy conversion from an array of bytes to any numeric base in [2, 256) + * and back again, using a supplied custom alphabet. + * </p> + * <p> + * Implemented by treating the input byte sequence to encode verbatim as a big-endian + * <code>BigInteger</code> and iteratively doing a <code>divmod</code> operation until + * the quotient is zero, emitting the modulus mapped onto the alphabet for each iteration. + * </p> + * <p> + * Decoding reverses this process, ending up with the same <code>BigInteger</code> as in + * the initial encoding step. + * </p> + * <p> + * Note that <code>BigInteger</code>s represent the <em>canonical</em> form of any given + * integer, which means that leading zero bytes are implicitly ignored. We therefore + * special-case this by unary-coding the number of leading zeroes in the encoded form, + * where a leading zero byte is mapped to the <em>first</em> character of the alphabet. + * </p> + * <p>Example for Base58, which starts its alphabet with 1 (0 is not present):</p> + * <pre> + * "Hello World!" = "2NEpo7TZRRrLZSi2U" + * "\0\0Hello World!" = "112NEpo7TZRRrLZSi2U" (note leading 1s) + * </pre> + * <p>Example for Base62, which starts its alphabet with 0:</p> + * <pre> + * "Hello World!" = "T8dgcjRGkZ3aysdN" + * "\0\0Hello World!" = "00T8dgcjRGkZ3aysdN" (node leading 0s) + * </pre> + * <p> + * <strong>Important:</strong> runtime complexity is <em>O(n<sup>2</sup>)</em> for both + * encoding and decoding, so this should only be used to encode/decode relatively short + * byte sequences. This is <em>not</em> a replacement for Base64 etc. encoding that runs + * in linear time! In addition, a <code>BaseNCodec</code> with a Base64 alphabet encodes + * to a completely different output than a regular Base64 encoder when the input is not + * evenly divisible by three. This is due to regular Base64 explicitly handling padding, + * while this codec does not. + * </p> + * + * @author vekterli + */ +public class BaseNCodec { + + public static final int MAX_BASE = 255; /** Inclusive */ + + private static class Alphabet { + final char[] alphabetChars; + final int[] reverseLut; + + Alphabet(String alphabetIn) { + if (alphabetIn.length() < 2) { // We don't do unary... + throw new IllegalArgumentException("Alphabet requires at least two symbols"); + } + if (alphabetIn.length() > MAX_BASE) { + throw new IllegalArgumentException("Alphabet size too large"); + } + alphabetChars = alphabetIn.toCharArray(); + int highestChar = Integer.MIN_VALUE; + for (char ch : alphabetChars) { + highestChar = Math.max(highestChar, ch); + } + reverseLut = new int[highestChar + 1]; + Arrays.fill(reverseLut, -1); // -1 => invalid mapping + for (int i = 0; i < alphabetChars.length; ++i) { + if (reverseLut[alphabetChars[i]] != -1) { + throw new IllegalArgumentException("Alphabet character '%c' occurs more than once" + .formatted(alphabetChars[i])); + } + reverseLut[alphabetChars[i]] = i; + } + } + } + + private static final BigInteger BN_ZERO = BigInteger.valueOf(0); + + private final Alphabet alphabet; + private final BigInteger alphabetLenBN; + + private BaseNCodec(String alphabet) { + this.alphabet = new Alphabet(alphabet); + this.alphabetLenBN = BigInteger.valueOf(this.alphabet.alphabetChars.length); + } + + public static BaseNCodec of(String alphabet) { + return new BaseNCodec(alphabet); + } + + public int base() { return this.alphabet.alphabetChars.length; } + + public String encode(byte[] input) { + var sb = new StringBuilder(input.length * 2); // Not at all exact, but builder can resize anyway + var num = new BigInteger(1, input); // Treat as _positive_ big endian bigint (explicit signum=1) + // Standard base N digit conversion loop. Note: emits in reverse order since we + // append the least significant digit first. We reverse this later on. + while (!num.equals(BN_ZERO)) { + BigInteger[] quotRem = num.divideAndRemainder(alphabetLenBN); + num = quotRem[0]; + sb.append(alphabet.alphabetChars[quotRem[1].intValue()]); + } + for (byte leadingByte : input) { + if (leadingByte != 0x00) { + break; + } + sb.append(alphabet.alphabetChars[0]); + } + return sb.reverse().toString(); + } + + public byte[] decode(String input) { + char[] inputChars = input.toCharArray(); + int prefixNulls = 0; + for (char leadingChar : inputChars) { + if (leadingChar != alphabet.alphabetChars[0]) { + break; + } + ++prefixNulls; + } + // Restore the BigInteger representation by reversing the base conversion done during encoding. + var accu = BN_ZERO; + for (char c : inputChars) { + int idx = (c < alphabet.reverseLut.length) ? alphabet.reverseLut[c] : -1; + if (idx == -1) { + throw new IllegalArgumentException("Input character not part of codec alphabet"); + } + accu = accu.multiply(alphabetLenBN).add(BigInteger.valueOf(idx)); + } + byte[] bnBytes = accu.toByteArray(); + // If the most significant bigint byte is zero, it means the most significant bit of the + // next byte is 1 (or the bnBytes length is 1, in which case prefixNulls == 1) and the bigint + // representation uses 1 extra byte to be positive in 2's complement. If so, prune it away + // to avoid prefixing with a spurious null-byte. + boolean msbZero = (bnBytes[0] == 0x0); + if (prefixNulls == 0 && !msbZero) { + return bnBytes; + } else { + int realLen = (msbZero ? bnBytes.length - 1 : bnBytes.length); + byte[] result = new byte[prefixNulls + realLen]; + // #prefixNulls prefix bytes are implicitly zero + System.arraycopy(bnBytes, (msbZero ? 1 : 0), result, prefixNulls, realLen); + return result; + } + } + +} diff --git a/security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java b/security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java new file mode 100644 index 00000000000..da67ea2dff3 --- /dev/null +++ b/security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java @@ -0,0 +1,122 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.security; + +import org.junit.jupiter.api.Test; + +import java.math.BigInteger; + +import static com.yahoo.security.ArrayUtils.hex; +import static com.yahoo.security.ArrayUtils.toUtf8Bytes; +import static com.yahoo.security.ArrayUtils.unhex; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * @author vekterli + */ +public class BaseNCodecTest { + + private static void verifyRoundtrip(BaseNCodec codec, byte[] bytes, String expectedEncoded) { + String enc = codec.encode(bytes); + assertEquals(expectedEncoded, enc); + byte[] dec = codec.decode(enc); + assertEquals(hex(bytes), hex(dec)); + } + + private static void verifyRoundtrip(BaseNCodec codec, String str, String expectedEncoded) { + verifyRoundtrip(codec, toUtf8Bytes(str), expectedEncoded); + } + + @Test + void decoding_chars_not_in_alphabet_throws() { + var b58 = Base58.codec(); + // [0OIl] are not in Base58 alphabet, but within the alphabet LUT range + assertThrows(IllegalArgumentException.class, () -> b58.decode("233QC0")); + // '{' is one beyond 'z', which is the highest char in the LUT range + assertThrows(IllegalArgumentException.class, () -> b58.decode("233QC{")); + } + + @Test + void alphabet_char_duplication_during_codec_setup_throws() { + assertThrows(IllegalArgumentException.class, () -> BaseNCodec.of("abcda")); + } + + @Test + void base58_codec_test_cases_pass() { + var b58 = Base58.codec(); + assertEquals(58, b58.base()); + // https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 test vectors: + verifyRoundtrip(b58, "Hello World!", "2NEpo7TZRRrLZSi2U"); + verifyRoundtrip(b58, "The quick brown fox jumps over the lazy dog.", + "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z"); + verifyRoundtrip(b58, unhex("0000287fb4cd"), "11233QC4"); + + // Values that have been cross-referenced with other encoder implementations: + verifyRoundtrip(b58, "", ""); + verifyRoundtrip(b58, unhex("00"), "1"); + verifyRoundtrip(b58, unhex("0000"), "11"); + verifyRoundtrip(b58, unhex("ff"), "5Q"); + verifyRoundtrip(b58, unhex("00ff"), "15Q"); + verifyRoundtrip(b58, unhex("ff00"), "LQX"); + verifyRoundtrip(b58, unhex("ffffff"), "2UzHL"); + verifyRoundtrip(b58, unhex("287fb4cd"), "233QC4"); + } + + @Test + void base62_codec_test_cases_pass() { + var b62 = Base62.codec(); + assertEquals(62, b62.base()); + verifyRoundtrip(b62, "Hello World!", "T8dgcjRGkZ3aysdN"); + verifyRoundtrip(b62, "\0\0Hello World!", "00T8dgcjRGkZ3aysdN"); + verifyRoundtrip(b62, "", ""); + verifyRoundtrip(b62, unhex("00"), "0"); + verifyRoundtrip(b62, unhex("0000"), "00"); + verifyRoundtrip(b62, unhex("00000000ffffffff"), "00004gfFC3"); + verifyRoundtrip(b62, unhex("ffffffff00000000"), "LygHZwPV2MC"); + } + + // Test with some common bases that are easier to reason about: + + @Test + void codec_generalizes_down_to_base_10() { + var b10 = BaseNCodec.of("0123456789"); + verifyRoundtrip(b10, unhex("00"), "0"); + verifyRoundtrip(b10, unhex("000f"), "015"); + verifyRoundtrip(b10, unhex("ffff"), "65535"); + + // A large prime number: 2^252 + 27742317777372353535851937790883648493 (Curve25519 order) + var numStr = "7237005577332262213973186563042994240857116359379907606001950938285454250989"; + var numBN = new BigInteger(numStr); + verifyRoundtrip(b10, numBN.toByteArray(), numStr); + } + + // Possibly world's most inefficient hex conversion? + @Test + void codec_generalizes_down_to_base_16() { + var b2 = BaseNCodec.of("0123456789ABCDEF"); + assertEquals(16, b2.base()); + verifyRoundtrip(b2, unhex(""), ""); + verifyRoundtrip(b2, unhex("00"), "0"); + verifyRoundtrip(b2, unhex("80"), "80"); + verifyRoundtrip(b2, unhex("01"), "1"); + verifyRoundtrip(b2, unhex("F0"), "F0"); + verifyRoundtrip(b2, unhex("0F"), "F"); + verifyRoundtrip(b2, unhex("F00F"), "F00F"); + verifyRoundtrip(b2, unhex("5FAF"), "5FAF"); + } + + // Very likely genuinely the world's most inefficient binary conversion. + @Test + void codec_generalizes_down_to_base_2() { + var b2 = BaseNCodec.of("01"); + assertEquals(2, b2.base()); + verifyRoundtrip(b2, unhex(""), ""); + verifyRoundtrip(b2, unhex("00"), "0"); + verifyRoundtrip(b2, unhex("000000"), "000"); // note: prefix zero byte sentinels! + verifyRoundtrip(b2, unhex("80"), "10000000"); + verifyRoundtrip(b2, unhex("01"), "1"); + verifyRoundtrip(b2, unhex("F0"), "11110000"); + verifyRoundtrip(b2, unhex("0F"), "1111"); + } + +} diff --git a/vespalib/src/tests/coro/generator/.gitignore b/vespalib/src/tests/coro/generator/.gitignore new file mode 100644 index 00000000000..748003a81fe --- /dev/null +++ b/vespalib/src/tests/coro/generator/.gitignore @@ -0,0 +1 @@ +/vespalib_generator_bench_app diff --git a/vespalib/src/tests/coro/generator/CMakeLists.txt b/vespalib/src/tests/coro/generator/CMakeLists.txt index b4f59c69451..e2534274f7c 100644 --- a/vespalib/src/tests/coro/generator/CMakeLists.txt +++ b/vespalib/src/tests/coro/generator/CMakeLists.txt @@ -6,4 +6,11 @@ vespa_add_executable(vespalib_generator_test_app TEST vespalib GTest::GTest ) +vespa_add_executable(vespalib_generator_bench_app TEST + SOURCES + generator_bench.cpp + DEPENDS + vespalib + GTest::GTest +) vespa_add_test(NAME vespalib_generator_test_app COMMAND vespalib_generator_test_app) diff --git a/vespalib/src/tests/coro/generator/generator_bench.cpp b/vespalib/src/tests/coro/generator/generator_bench.cpp new file mode 100644 index 00000000000..664132b7ba4 --- /dev/null +++ b/vespalib/src/tests/coro/generator/generator_bench.cpp @@ -0,0 +1,58 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/coro/generator.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <ranges> +#include <vector> + +using vespalib::coro::Generator; +using vespalib::BenchmarkTimer; + +std::vector<size_t> make_data() __attribute__((noinline)); +std::vector<size_t> make_data(size_t size) { + std::vector<size_t> data; + for (size_t i = 0; i < size; ++i) { + data.push_back(i); + } + return data; +} + +template <std::ranges::input_range T> +size_t calc_sum(T&& values) { + size_t sum = 0; + for (auto&& value: values) { + sum += value; + } + return sum; +} + +size_t calc_sum_direct(const std::vector<size_t> &values) { + return calc_sum(values); +} + +size_t calc_sum_wrapped(const std::vector<size_t> &values) { + return calc_sum([](const std::vector<size_t> &inner_values)->Generator<size_t> + { + for (auto&& value: inner_values) { + co_yield value; + } + }(values)); +} + +TEST(GeneratorBench, direct_vs_wrapped_vector_for_loop) { + std::vector<size_t> data = make_data(100000); + double direct_ms = BenchmarkTimer::benchmark([&data](){ + size_t sink = calc_sum_direct(data); + (void) sink; + }, 5.0) * 1000.0; + fprintf(stderr, "direct: %g ms\n", direct_ms); + double wrapped_ms = BenchmarkTimer::benchmark([&data](){ + size_t sink = calc_sum_wrapped(data); + (void) sink; + }, 5.0) * 1000.0; + fprintf(stderr, "wrapped: %g ms\n", wrapped_ms); + fprintf(stderr, "ratio: %g\n", (wrapped_ms/direct_ms)); +} + +GTEST_MAIN_RUN_ALL_TESTS() |