diff options
Diffstat (limited to 'searchlib/src/tests/bitcompression')
5 files changed, 632 insertions, 0 deletions
diff --git a/searchlib/src/tests/bitcompression/expgolomb/.gitignore b/searchlib/src/tests/bitcompression/expgolomb/.gitignore new file mode 100644 index 00000000000..5ba0f36a2f0 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/.gitignore @@ -0,0 +1 @@ +searchlib_expgolomb_test_app diff --git a/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt new file mode 100644 index 00000000000..f724773dfd6 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_expgolomb_test_app + SOURCES + expgolomb_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_expgolomb_test_app NO_VALGRIND COMMAND searchlib_expgolomb_test_app) diff --git a/searchlib/src/tests/bitcompression/expgolomb/DESC b/searchlib/src/tests/bitcompression/expgolomb/DESC new file mode 100644 index 00000000000..4abef0ecf24 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/DESC @@ -0,0 +1 @@ +Exp golomb encoding / decoding test. Take a look at expgolomb_test.cpp for details. diff --git a/searchlib/src/tests/bitcompression/expgolomb/FILES b/searchlib/src/tests/bitcompression/expgolomb/FILES new file mode 100644 index 00000000000..dbc3fa5e527 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/FILES @@ -0,0 +1 @@ +expgolomb_test.cpp diff --git a/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp new file mode 100644 index 00000000000..dcf0f69ee55 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp @@ -0,0 +1,621 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("expglomb_test"); +#include <vespa/searchlib/bitcompression/compression.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vector> + +using search::bitcompression::DecodeContext64; +using search::bitcompression::DecodeContext64Base; +using search::bitcompression::EncodeContext64; +using search::bitcompression::EncodeContext64Base; + +template <bool bigEndian> +class DecodeContext : public DecodeContext64<bigEndian> +{ +public: + using Parent = DecodeContext64<bigEndian>; + using Parent::defineReadOffset; + using EC = EncodeContext64<bigEndian>; + + DecodeContext(const uint64_t *compr, int bitOffset) + : DecodeContext64<bigEndian>(compr, bitOffset) + { + this->defineReadOffset(0); + } +}; + + +class IDecodeFunc +{ +public: + virtual uint64_t decode() = 0; + virtual void skip() = 0; + virtual uint64_t decodeSmall() = 0; + virtual uint64_t decodeSmallApply() = 0; + virtual void skipSmall() = 0; + + virtual ~IDecodeFunc() { } + +}; + + +/* + * Exp golomb decode functions getting kValue from a variable, i.e. + * compiler is not allowed to generate shift instructions with immediate values. + * Expressions involving kValue are not constant and can thus not be + * folded to constant values. + */ +template <bool bigEndian> +class DecodeExpGolombVarK : public IDecodeFunc +{ +public: + using DCB = DecodeContext64Base; + using DC = DecodeContext<bigEndian>; + using EC = typename DC::EC; + + DCB &_dc; + int _kValue; + + DecodeExpGolombVarK(DCB &dc, int kValue) + : _dc(dc), + _kValue(kValue) + { + } + + virtual uint64_t decode() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + return val64; + } + + virtual void skip() + { + unsigned int length; + UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + } + + virtual uint64_t decodeSmall() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + return val64; + } + + virtual uint64_t decodeSmallApply() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC, val64 =); + return val64; + } + + virtual void skipSmall() + { + unsigned int length; + UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + } + + static std::unique_ptr<IDecodeFunc> + make(DCB &dc, int kValue) + { + return std::unique_ptr<IDecodeFunc> + (new DecodeExpGolombVarK<bigEndian>(dc, kValue)); + } +}; + + +/* + * Exp golomb decode functions getting kValue from a template argument + * i.e. compiler is allowed to generate shift instructions with + * immediate values and fold constant expressions involving kValue. + */ +template <bool bigEndian, int kValue> +class DecodeExpGolombConstK : public IDecodeFunc +{ +public: + using DCB = DecodeContext64Base; + using DC = DecodeContext<bigEndian>; + using EC = typename DC::EC; + + DCB &_dc; + + DecodeExpGolombConstK(DCB &dc) + : _dc(dc) + { + } + + virtual uint64_t decode() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + return val64; + } + + virtual void skip() + { + unsigned int length; + UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + } + + virtual uint64_t decodeSmall() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + return val64; + } + + virtual uint64_t decodeSmallApply() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC, val64 =); + return val64; + } + + virtual void skipSmall() + { + unsigned int length; + UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + } + + static std::unique_ptr<IDecodeFunc> + make(DCB &dc, int) + { + return std::unique_ptr<IDecodeFunc> + (new DecodeExpGolombConstK<bigEndian, kValue>(dc)); + } +}; + + +using IDecodeFuncFactory = + std::unique_ptr<IDecodeFunc> (*)(DecodeContext64Base &dc, int kValue); + + +template <bool bigEndian> +class DecodeFuncFactories +{ +public: + using IDF = IDecodeFuncFactory; + std::vector<IDF> _constK; + IDF _varK; + +public: + DecodeFuncFactories(); + + void + addConstKFactory(int kValue, IDecodeFuncFactory factory) + { + assert(static_cast<unsigned int>(kValue) == _constK.size()); + _constK.push_back(factory); + } + + IDecodeFuncFactory + getConstKFactory(int kValue) const + { + assert(kValue >= 0 && + static_cast<unsigned int>(kValue) < _constK.size()); + return _constK[kValue]; + } + + IDecodeFuncFactory + getVarKFactory() const + { + return _varK; + } +}; + + +template <bool bigEndian> +struct RegisterFactoryPtr; + + +template <bool bigEndian> +using RegisterFactory = void (*)(DecodeFuncFactories<bigEndian> &factories, + RegisterFactoryPtr<bigEndian> &ptr); + + +template <bool bigEndian> +struct RegisterFactoryPtr +{ + RegisterFactory<bigEndian> _ptr; + + RegisterFactoryPtr(RegisterFactory<bigEndian> ptr) + : _ptr(ptr) + { + } +}; + + +template <bool bigEndian, int kValue> +class RegisterFactories +{ +public: + static void registerFactory(DecodeFuncFactories<bigEndian> &factories, + RegisterFactoryPtr<bigEndian> &ptr) + { + factories.addConstKFactory(kValue, + &DecodeExpGolombConstK<bigEndian, kValue>:: + make); + ptr._ptr = &RegisterFactories<bigEndian, kValue+1>::registerFactory; + } +}; + + +template <bool bigEndian> +class RegisterFactories<bigEndian, 64> +{ +public: + static void registerFactory(DecodeFuncFactories<bigEndian> &factories, + RegisterFactoryPtr<bigEndian> &ptr) + { + (void) factories; + ptr._ptr = nullptr; + } +}; + + +template <bool bigEndian> +DecodeFuncFactories<bigEndian>::DecodeFuncFactories() + : _constK(), + _varK(&DecodeExpGolombVarK<bigEndian>::make) +{ + RegisterFactoryPtr<bigEndian> f( + &RegisterFactories<bigEndian, 0>::registerFactory); + while (f._ptr) { + (*f._ptr)(*this, f); + } +} + + +class TestFixtureBase +{ +public: + std::vector<uint64_t> _randNums; + using EC = EncodeContext64Base; + + void fillRandNums(); + + void + calcBoundaries(int kValue, bool small, std::vector<uint64_t> &v); + + void + testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + DecodeContext64Base &dcApply, + IDecodeFunc &df, + IDecodeFunc &dfSkip, + IDecodeFunc &dfApply); + + void + testRandNums(DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + IDecodeFunc &df, + IDecodeFunc &dfSkip); +}; + + +void +TestFixtureBase::fillRandNums() +{ + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + _randNums.push_back(rval); + } + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + uint32_t bits = (rand() & 63); + rval &= ((UINT64_C(1) << bits) - 1); + _randNums.push_back(rval); + } +} + + +namespace +{ + +/* + * Add values around a calculated boundary, to catch off by one errors. + */ +void +addBoundary(uint64_t boundary, uint64_t maxVal, std::vector<uint64_t> &v) +{ + uint64_t low = boundary > 2u ? boundary - 2 : 0; + uint64_t high = maxVal - 2u < boundary ? maxVal : boundary + 2; + assert(low <= high); + LOG(info, "low=0x%lx, high=0x%lx", low, high); + uint64_t i = low; + for (;;) { + v.push_back(i); + if (i == high) + break; + ++i; + } +} + +} + +void +TestFixtureBase::calcBoundaries(int kValue, bool small, + std::vector<uint64_t> &v) +{ + const char *smallStr = small ? "small" : "not small"; + v.push_back(0); + uint64_t maxVal = EC::maxExpGolombVal(kValue); // encode method limit + if (small) { + maxVal = EC::maxExpGolombVal(kValue, 64); + } + LOG(debug, "kValue=%u, %s, maxVal is 0x%lx", kValue, smallStr, maxVal); + for (int bits = kValue + 1; + bits + kValue <= 128 && (bits <= 64 || !small); + ++bits) { + uint64_t boundary = EC::maxExpGolombVal(kValue, bits); + if (bits + kValue == 128) { + LOG(debug, + "boundary for kValue=%d, %s, bits=%d: 0x%lx", + kValue, smallStr, bits, boundary); + } + addBoundary(boundary, maxVal, v); + } + std::sort(v.begin(), v.end()); + auto ve = std::unique(v.begin(), v.end()); + uint32_t oldSize = v.size(); + v.resize(ve - v.begin()); + uint32_t newSize = v.size(); + LOG(debug, + "kValues=%u, %s, boundaries %u -> %u, maxVal=0x%lx, highest=0x%lx", + kValue, smallStr, oldSize, newSize, maxVal, v.back()); +} + + +void +TestFixtureBase::testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + DecodeContext64Base &dcApply, + IDecodeFunc &df, + IDecodeFunc &dfSkip, + IDecodeFunc &dfApply) +{ + uint32_t bits = 0; + uint64_t maxSame = 0; + + for (auto num : v) { + uint64_t prevPos = dc.getReadOffset(); + uint64_t val64 = small ? df.decodeSmall() : df.decode(); + EXPECT_EQUAL(num, val64); + uint64_t currPos = dc.getReadOffset(); + if (small) { + dfSkip.skipSmall(); + } else { + dfSkip.skip(); + } + EXPECT_EQUAL(currPos, dcSkip.getReadOffset()); + if (small) { + uint64_t sval64 = dfApply.decodeSmallApply(); + EXPECT_EQUAL(num, sval64); + EXPECT_EQUAL(currPos, dcApply.getReadOffset()); + } + if (num == 0) { + bits = currPos - prevPos; + maxSame = EC::maxExpGolombVal(kValue, bits); + } else { + assert(bits <= currPos - prevPos); + if (bits < currPos - prevPos) { + ASSERT_EQUAL(bits + 2, currPos - prevPos); + bits += 2; + ASSERT_EQUAL(maxSame + 1, num); + maxSame = EC::maxExpGolombVal(kValue, bits); + } + } + } +} + + +void +TestFixtureBase::testRandNums(DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + IDecodeFunc &df, + IDecodeFunc &dfSkip) +{ + for (auto num : _randNums) { + uint64_t val64 = df.decode(); + EXPECT_EQUAL(num, val64); + uint64_t currPos = dc.getReadOffset(); + dfSkip.skip(); + EXPECT_EQUAL(currPos, dcSkip.getReadOffset()); + } +} + + + +template <bool bigEndian> +class TestFixture : public TestFixtureBase +{ +public: + DecodeFuncFactories<bigEndian> _factories; + using DC = DecodeContext<bigEndian>; + using EC = typename DC::EC; + using Parent = TestFixtureBase; + using Parent::testBoundaries; + using Parent::testRandNums; + + TestFixture() + : TestFixtureBase(), + _factories() + { + fillRandNums(); + } + + void + testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc); + void + testBoundaries(int kValue, bool small, std::vector<uint64_t> &v); + + void + testBoundaries(); + + void + testRandNums(int kValue, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc); + + void + testRandNums(int kValue); + + void + testRandNums(); +}; + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc) +{ + DC dc(static_cast<const uint64_t *>(wc._comprBuf), 0); + DC dcSkip(static_cast<const uint64_t *>(wc._comprBuf), 0); + DC dcApply(static_cast<const uint64_t *>(wc._comprBuf), 0); + std::unique_ptr<IDecodeFunc> df((*f)(dc, kValue)); + std::unique_ptr<IDecodeFunc> dfSkip((*f)(dcSkip, kValue)); + std::unique_ptr<IDecodeFunc> dfApply((*f)(dcApply, kValue)); + testBoundaries(kValue, small, v, dc, dcSkip, dcApply, + *df, *dfSkip, *dfApply); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v) +{ + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + for (auto num : v) { + e.encodeExpGolomb(num, kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + IDecodeFuncFactory f = _factories.getConstKFactory(kValue); + testBoundaries(kValue, small, v, f, wc); + f = _factories.getVarKFactory(); + testBoundaries(kValue, small, v, f, wc); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testBoundaries() +{ + for (int kValue = 0; kValue < 64; ++kValue) { + std::vector<uint64_t> v; + calcBoundaries(kValue, false, v); + testBoundaries(kValue, false, v); + /* + * Note: We don't support kValue being 63 for when decoding + * "small" numbers (limited to 64 bits in encoded form) since + * performance penalty is not worth the extra flexibility. + */ + if (kValue < 63) { + v.clear(); + calcBoundaries(kValue, true, v); + testBoundaries(kValue, true, v); + } + } +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testRandNums(int kValue, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc) +{ + DC dc(static_cast<const uint64_t *>(wc._comprBuf), 0); + DC dcSkip(static_cast<const uint64_t *>(wc._comprBuf), 0); + std::unique_ptr<IDecodeFunc> df((*f)(dc, kValue)); + std::unique_ptr<IDecodeFunc> dfSkip((*f)(dcSkip, kValue)); + testRandNums(dc, dcSkip, *df, *dfSkip); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testRandNums(int kValue) +{ + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + for (auto num : _randNums) { + e.encodeExpGolomb(num, kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + IDecodeFuncFactory f = _factories.getConstKFactory(kValue); + testRandNums(kValue, f, wc); + f = _factories.getVarKFactory(); + testRandNums(kValue, f, wc); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testRandNums() +{ + for (int k = 0; k < 64; ++k) { + testRandNums(k); + } +} + + +TEST_F("Test bigendian expgolomb encoding/decoding", TestFixture<true>) +{ + f.testRandNums(); + f.testBoundaries(); +} + + +TEST_F("Test little expgolomb encoding/decoding", TestFixture<false>) +{ + f.testRandNums(); + f.testBoundaries(); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } |