diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests/sort |
Publish
Diffstat (limited to 'searchlib/src/tests/sort')
-rw-r--r-- | searchlib/src/tests/sort/.gitignore | 8 | ||||
-rw-r--r-- | searchlib/src/tests/sort/CMakeLists.txt | 22 | ||||
-rw-r--r-- | searchlib/src/tests/sort/DESC | 1 | ||||
-rw-r--r-- | searchlib/src/tests/sort/FILES | 1 | ||||
-rw-r--r-- | searchlib/src/tests/sort/javaorder.zh | 158 | ||||
-rw-r--r-- | searchlib/src/tests/sort/sort_test.cpp | 295 | ||||
-rw-r--r-- | searchlib/src/tests/sort/sortbenchmark.cpp | 115 | ||||
-rw-r--r-- | searchlib/src/tests/sort/uca.cpp | 121 |
8 files changed, 721 insertions, 0 deletions
diff --git a/searchlib/src/tests/sort/.gitignore b/searchlib/src/tests/sort/.gitignore new file mode 100644 index 00000000000..7207ff4596d --- /dev/null +++ b/searchlib/src/tests/sort/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +sort_test +uca_stress +/sortbenchmark +searchlib_sort_test_app +searchlib_sortbenchmark_app +searchlib_uca_stress_app diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt new file mode 100644 index 00000000000..1830952bffd --- /dev/null +++ b/searchlib/src/tests/sort/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sortbenchmark_app + SOURCES + sortbenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sortbenchmark_app COMMAND searchlib_sortbenchmark_app BENCHMARK) +vespa_add_executable(searchlib_sort_test_app + SOURCES + sort_test.cpp + DEPENDS + searchlib +) +#vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app) +vespa_add_executable(searchlib_uca_stress_app + SOURCES + uca.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK) diff --git a/searchlib/src/tests/sort/DESC b/searchlib/src/tests/sort/DESC new file mode 100644 index 00000000000..ad8ab11f5ba --- /dev/null +++ b/searchlib/src/tests/sort/DESC @@ -0,0 +1 @@ +Testing templatized radixsort. diff --git a/searchlib/src/tests/sort/FILES b/searchlib/src/tests/sort/FILES new file mode 100644 index 00000000000..e2ef9d3c1ab --- /dev/null +++ b/searchlib/src/tests/sort/FILES @@ -0,0 +1 @@ +sort.cpp diff --git a/searchlib/src/tests/sort/javaorder.zh b/searchlib/src/tests/sort/javaorder.zh new file mode 100644 index 00000000000..0d29efc99bd --- /dev/null +++ b/searchlib/src/tests/sort/javaorder.zh @@ -0,0 +1,158 @@ + + + +30雜誌30雜誌 +asiatwnewsasiatwnews +AZ時尚旅遊AZ時尚旅遊 +bobo小天才養成誌bobo小天才養成誌 +Career職場情報誌Career職場情報誌 +CheersCheers雜誌 +EMBAEMBA雜誌 +ETtodayETtoday +FASHION QUEEN時尚女王FASHION QUEEN時尚女王 +iLOOKiLOOK電影雜誌 +men&#39;s uno男人誌men&#39;s uno男人誌 +Money 錢Money 錢 +NOWnewsNOWnews +NOWnews今日新聞網 +PAR表演藝術PAR表演藝術雜誌 +Press Association ImagesPress Association Images +Smart智富月刊Smart智富月刊 +Taipei WalkerTaipei Walker +TSNATSNA +TVBSTVBS +Yahoo! Taiwan Specials without layoutYahoo! Taiwan Specials without layout +Yahoo奇摩video.yahoo.com(勿用) +Yahoo奇摩Yahoo奇摩(爆新聞) +Yahoo奇摩Yahoo奇摩(新聞) +Yahoo奇摩Yahoo奇摩(影音) +Yahoo奇摩新聞Yahoo奇摩新聞(報氣象) +YourNewsYourNews +Y特別企畫Y特別企畫 +愛爾達愛爾達 +愛爾達愛爾達電視 +財訊快報財訊快報 +財訊快報財訊快報季刊 +財訊快報季刊財訊快報季刊 +財訊快報季刊財訊快報季刊 +財訊雙週刊財訊雙週刊 +常春月刊常春月刊 +朝鮮日報朝鮮日報 +達志達志 +達志達志 +達志達志 +大家健康雜誌大家健康雜誌 +大師輕鬆讀大師輕鬆讀 +大台灣旅遊網大台灣旅遊網 +東森新聞東森新聞 +東森新聞東森新聞 +東星東星 +俄羅斯新聞網俄羅斯新聞網 +法新社法新社 +非凡新聞非凡新聞 +非凡新聞節目非凡新聞節目 +富爾特消費新聞富爾特消費新聞 +公共電視公共電視 +公視公視 +古美術古美術 +管理雜誌管理雜誌 +光華雜誌台灣光華雜誌 +廣編特輯廣編特輯 +廣告雜誌廣告雜誌 +國際商情雙周刊國際商情雙周刊 +哈佛商業評論哈佛商業評論 +韓國朝鮮日報韓國朝鮮日報 +韓國中央日報韓國中央日報 +韓國中央日報韓國中央日報 +韓星網韓星網 +韓星網韓星網 +華人健康網華人健康網 +華視華視 +華視華視 +華視華視 +環境資訊中心環境資訊中心 +健康醫療網健康醫療網 +健康醫療網健康醫療網 +講義雜誌講義雜誌 +教育廣播電台國立教育廣播電台 +今藝術今藝術 +今周刊今周刊 +今周刊今周刊 +經理人經理人月刊 +鉅亨網鉅亨網 +軍聞社軍聞社 +卡優新聞網卡優新聞網 +康健雜誌康健雜誌 +科學人科學人雜誌 +客家電視客家電視台 +酷搜圖聞酷搜圖聞 +理財周刊理財周刊 +麗台運動報麗台運動報 +聯合文學聯合文學 +聯合新聞網聯合新聞網 +路透社路透社 +路透社路透社 +旅遊經旅遊經 +羅開Golf 頻道羅開Golf 頻道 +媽媽寶寶媽媽寶寶 +美麗佳人美麗佳人雜誌 +美聯社美聯社 +美通社美通社 +民視民視 +民視民視 +明報周刊明報周刊 +男人幫男人幫 +能力雜誌能力雜誌 +年代新聞年代新聞 +年代新聞年代新聞 +年代新聞年代新聞 +年代新聞年代新聞 +紐約時報中文網.紐約時報中文網. +紐約時報中文網.紐約時報中文網. +紐約時報中文網紐約時報中文網 +儂儂雜誌儂儂雜誌 +蓬勃網球蓬勃網球 +蘋果日報蘋果日報 +親子天下親子天下 +全國廣播全國廣播 +全球中央全球中央雜誌 +商業周刊商業周刊 +數位家庭數位家庭 +數位時代數位時代 +台灣立報台灣立報 +台灣新生報台灣新生報 +台灣醒報台灣醒報 +台灣醒報台灣醒報 +臺灣時報臺灣時報 +天下雜誌天下雜誌 +統一獅 Video統一獅 Video +玩高爾夫玩高爾夫 +旺報旺報 +先探投資週刊先探投資週刊 +現代保險健康理財雜誌現代保險健康理財雜誌 +香港中文大學EMBA Videos香港中文大學EMBA +新頭殼新頭殼 +新新聞周刊新新聞周刊 +兄弟象 Video兄弟象 Video +訊息快遞Yahoo 奇摩新聞訊息快遞 +野球人野球人 +壹電視壹電視 +壹電視壹電視 +壹蘋果壹蘋果 +義大犀牛 Video義大犀牛 Video +優活健康網優活健康網 +原視原視 +遠見雜誌遠見雜誌 +張老師月刊張老師月刊 +整形達人整形達人 +中廣中廣新聞網 +中華日報中華日報 +中華職棒中華職棒 +中時電子報中時電子報 +中央廣播電台中央廣播電台 +中央日報中央日報 +中央社中央社 +中央社中央社 +自立晚報自立晚報 +自由時報自由時報 +自由時報自由時報 diff --git a/searchlib/src/tests/sort/sort_test.cpp b/searchlib/src/tests/sort/sort_test.cpp new file mode 100644 index 00000000000..cf5e1a1cb1f --- /dev/null +++ b/searchlib/src/tests/sort/sort_test.cpp @@ -0,0 +1,295 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/searchlib/common/sortspec.h> +#include <vespa/searchlib/common/converters.h> +#include <vespa/vespalib/util/array.h> +#include <vector> +#include <fstream> +#include <iostream> +#include <iomanip> +#include <stdexcept> +#include <unicode/ustring.h> + +LOG_SETUP("sort_test"); + +using vespalib::Array; +using namespace search::common; +using vespalib::ConstBufferRef; + +class Test : public vespalib::TestApp +{ +public: + int Main(); + void testUnsignedIntegerSort(); + template <typename T> + void testSignedIntegerSort(); + void testStringSort(); + void testIcu(); + void testStringCaseInsensitiveSort(); + void testSortSpec(); + void testSameAsJavaOrder(); +}; + +struct LoadedStrings +{ + LoadedStrings(const char * v=NULL) : _value(v), _currRadix(_value) { } + + class ValueRadix + { + public: + char operator () (LoadedStrings & x) const { + unsigned char c(*x._currRadix); + if (c) { + x._currRadix++; + } + return c; + } + }; + + class ValueCompare : public std::binary_function<LoadedStrings, LoadedStrings, bool> { + public: + bool operator() (const LoadedStrings & x, const LoadedStrings & y) const { + return strcmp(x._value, y._value) < 0; + } + }; + const char * _value; + const char * _currRadix; +}; + +void Test::testIcu() +{ + { + const std::string src("Creation of Bob2007 this is atumated string\this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string; _ 12345567890-=,./;'[;"); + std::vector<uint16_t> u16Buffer(100); + UErrorCode status = U_ZERO_ERROR; + int32_t u16Wanted(0); + u_strFromUTF8(&u16Buffer[0], u16Buffer.size(), &u16Wanted, src.c_str(), -1, &status); + ASSERT_TRUE(U_SUCCESS(status) || (status == U_INVALID_CHAR_FOUND) || ((status == U_BUFFER_OVERFLOW_ERROR) && (u16Wanted > (int)u16Buffer.size()))); + } +} + +void Test::testUnsignedIntegerSort() +{ + search::NumericRadixSorter<uint32_t, true> S; + S(NULL, 0); + + Array<uint32_t> array1(1); + array1[0] = 1567; + S(&array1[0], 1); + ASSERT_TRUE(array1[0] == 1567); + + unsigned int N(0x100000); + Array<uint32_t> array(N); + unsigned seed(1); + for(size_t i(0); i < N; i++) { + array[i] = rand_r(&seed); + } + S(&array[0], N); + for (size_t i(1); i < N; i++) { + ASSERT_TRUE(array[i] >= array[i-1]); + } +} + +template<typename T> +class IntOrder { +public: + uint64_t operator () (T v) const { return v ^ (std::numeric_limits<T>::max() + 1); } +}; + +template <typename T> +void Test::testSignedIntegerSort() +{ + search::NumericRadixSorter<T, true> S; + S(NULL, 0); + + Array<T> array1(1); + array1[0] = 1567; + S(&array1[0], 1); + ASSERT_TRUE(array1[0] == 1567); + + unsigned int N(0x100000); + Array<T> array(N); + unsigned seed(1); + for(size_t i(0); i < N; i++) { + T v = rand_r(&seed); + array[i] = (i%2) ? v : -v; + } + S(&array[0], N); + for (size_t i(1); i < N; i++) { + ASSERT_TRUE(array[i] >= array[i-1]); + } +} + +void Test::testStringSort() +{ + Array<LoadedStrings> array1(1); + + unsigned int N(0x1000); + Array<LoadedStrings> loaded(N); + std::vector<uint32_t> radixScratchPad(N); + search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, static_cast<LoadedStrings *>(NULL), 0, &radixScratchPad[0], 0); + + array1[0] = LoadedStrings("a"); + search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &array1[0], 1, &radixScratchPad[0], 0); + ASSERT_TRUE(strcmp(array1[0]._value, "a") == 0); + + loaded[0] = LoadedStrings("a"); + for(size_t i(1); i < N; i++) { + loaded[i] = LoadedStrings(""); + } + + search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &loaded[0], N, &radixScratchPad[0], 0); + LoadedStrings::ValueCompare vc; + for(size_t i(1); i < N; i++) { + ASSERT_TRUE( ! vc(loaded[i], loaded[i-1])); + } +} + +void Test::testStringCaseInsensitiveSort() +{ +} + +void Test::testSortSpec() +{ + { + SortSpec sortspec("-name"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() == NULL); + } + + { + SortSpec sortspec("-lowercase(name)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<LowercaseConverter *>(sortspec[0]._converter.get()) != NULL); + } + + { + SortSpec sortspec("-uca(name,nn_no)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,PRIMARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,SECONDARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,TERTIARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,QUATERNARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,IDENTICAL)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,zh)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,finnes_ikke)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + try { + SortSpec sortspec("-uca(name,nn_no,NTERTIARY)"); + EXPECT_TRUE(false); + } catch (const std::runtime_error & e) { + EXPECT_TRUE(true); + EXPECT_TRUE(strcmp(e.what(), "Illegal uca collation strength : NTERTIARY") == 0); + } + } +} + +void Test::testSameAsJavaOrder() +{ + std::vector<vespalib::string> javaOrder; + std::ifstream is("javaorder.zh"); + while (!is.eof()) { + std::string line; + getline(is, line); + if (!is.eof()) { + javaOrder.push_back(line); + } + } + EXPECT_EQUAL(158u, javaOrder.size()); + search::common::UcaConverter uca("zh", "PRIMARY"); + vespalib::ConstBufferRef fkey = uca.convert(vespalib::ConstBufferRef(javaOrder[0].c_str(), javaOrder[0].size())); + vespalib::string prev(fkey.c_str(), fkey.size()); + for (size_t i(1); i < javaOrder.size(); i++) { + vespalib::ConstBufferRef key = uca.convert(vespalib::ConstBufferRef(javaOrder[i].c_str(), javaOrder[i].size())); + vespalib::HexDump dump(key.c_str(), key.size()); + vespalib::string current(key.c_str(), key.size()); + UErrorCode status(U_ZERO_ERROR); + UCollationResult cr = uca.getCollator().compareUTF8(javaOrder[i-1].c_str(), javaOrder[i].c_str(), status); + std::cout << std::setw(3) << i << ": " << status << "(" << u_errorName(status) << ") - " << cr << " '" << dump << "' : '" << javaOrder[i] << "'" << std::endl; + EXPECT_TRUE(prev <= current); + EXPECT_TRUE(U_SUCCESS(status)); + EXPECT_TRUE(cr == UCOL_LESS || cr == UCOL_EQUAL); + prev = current; + } +} + + +TEST_APPHOOK(Test); + +int Test::Main() +{ + TEST_INIT("sort_test"); + + testUnsignedIntegerSort(); + testSignedIntegerSort<int32_t>(); + testSignedIntegerSort<int64_t>(); + testStringSort(); + testStringCaseInsensitiveSort(); + testSortSpec(); + testIcu(); + testSameAsJavaOrder(); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/sort/sortbenchmark.cpp b/searchlib/src/tests/sort/sortbenchmark.cpp new file mode 100644 index 00000000000..1309cf57d5d --- /dev/null +++ b/searchlib/src/tests/sort/sortbenchmark.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/vespalib/util/array.h> +#include <vector> + +LOG_SETUP("sort_test"); + +using vespalib::Array; +using vespalib::ConstBufferRef; + +class Test : public vespalib::TestApp +{ +public: + typedef std::vector<uint32_t> V; + std::vector< std::vector<uint32_t> > _data; + int Main(); + void generateVectors(size_t numVectors, size_t values); + V merge(); + void twoWayMerge(); + V cat() const; +}; + +void Test::generateVectors(size_t numVectors, size_t values) +{ + _data.resize(numVectors); + for (size_t j(0); j < numVectors; j++) { + V & v(_data[j]); + v.resize(values); + for (size_t i(0); i < values; i++) { + v[i] = i; + } + } +} + +Test::V Test::merge() +{ + twoWayMerge(); + return _data[0]; +} + +void Test::twoWayMerge() +{ + std::vector<V> n((_data.size()+1)/2); + + for ( size_t i(0), m(_data.size()/2); i < m; i++) { + const V & a = _data[i*2 + 0]; + const V & b = _data[i*2 + 1]; + n[i].resize(a.size() + b.size()); + std::merge(a.begin(), a.end(), b.begin(), b.end(), n[i].begin()); + } + if (_data.size()%2) { + n[n.size()-1].swap(_data[_data.size() - 1]); + } + _data.swap(n); + if (_data.size() > 1) { + twoWayMerge(); + } +} + +Test::V Test::cat() const +{ + size_t sum(0); + for (size_t i(0), m(_data.size()); i < m; i++) { + sum += _data[i].size(); + } + V c; + c.reserve(sum); + for (size_t i(0), m(_data.size()); i < m; i++) { + const V & v(_data[i]); + c.insert(c.end(), v.begin(), v.end()); + } + + return c; +} + +TEST_APPHOOK(Test); + +int Test::Main() +{ + TEST_INIT("sortbenchmark"); + size_t numVectors(11); + size_t values(10000000); + vespalib::string type("radix"); + if (_argc > 1) { + values = strtol(_argv[1], NULL, 0); + if (_argc > 2) { + numVectors = strtol(_argv[2], NULL, 0); + if (_argc > 2) { + type = _argv[3]; + } + } + } + + printf("Start with %ld vectors with %ld values and type '%s'(radix, qsort, merge)\n", numVectors, values, type.c_str()); + generateVectors(numVectors, values); + printf("Start cat\n"); + V v = cat(); + printf("Cat %ld values\n", v.size()); + if (type == "merge") { + V m = merge(); + printf("Merged %ld values\n", m.size()); + } else if (type == "qsort") { + std::sort(v.begin(), v.end()); + printf("sorted %ld value with std::sort\n", v.size()); + } else { + search::NumericRadixSorter<uint32_t, true> S; + S(&v[0], v.size()); + printf("sorted %ld value with radix::sort\n", v.size()); + } + + TEST_DONE(); +} diff --git a/searchlib/src/tests/sort/uca.cpp b/searchlib/src/tests/sort/uca.cpp new file mode 100644 index 00000000000..b9225c94a66 --- /dev/null +++ b/searchlib/src/tests/sort/uca.cpp @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/searchlib/common/sortspec.h> +#include <vespa/searchlib/common/converters.h> +#include <vespa/vespalib/util/array.h> +#include <memory> +#include <string> +#include <vector> +#include <stdexcept> +#include <unicode/ustring.h> + +LOG_SETUP("uca_stress"); + +using icu::Collator; + +class Test : public vespalib::TestApp +{ +public: + int Main(); + void testFromDat(); +}; + + +void Test::testFromDat() +{ + size_t badnesses = 0; + + std::string startMark("abc"); + std::string midMark("def"); + std::string endMark("ghi"); + + UErrorCode status = U_ZERO_ERROR; + auto coll = std::unique_ptr<Collator>(Collator::createInstance(icu::Locale("en"), status)); + + coll->setStrength(Collator::PRIMARY); + + std::vector<uint16_t> u16buffer(100); + std::vector<uint8_t> u8buffer(10); + + int fd = open("sort-blobs.dat", O_RDONLY); + char sbuf[4]; + + int num=0; + + uint32_t atleast = 0; + + while (read(fd, sbuf, 4) == 4) { + if (startMark == sbuf) { + uint32_t len = 0; + int r = read(fd, &len, 4); + + EXPECT_EQUAL(4, r); + r = read(fd, sbuf, 4); + EXPECT_EQUAL(4, r); + EXPECT_EQUAL(midMark, sbuf); + + if (u16buffer.size() < len) { + u16buffer.resize(len); + } + r = read(fd, &u16buffer[0], len*2); + EXPECT_EQUAL((int)len*2, r); + + r = read(fd, sbuf, 4); + EXPECT_EQUAL(4, r); + EXPECT_EQUAL(endMark, sbuf); + + uint32_t wanted = coll->getSortKey(&u16buffer[0], len, NULL, 0); + + EXPECT_TRUE(wanted > 0); + EXPECT_TRUE(wanted >= len); + EXPECT_TRUE(wanted < len*6); + + if (wanted + 20 > u8buffer.size()) { + u8buffer.resize(wanted+20); + } + + for (uint32_t pretend = 1; pretend < wanted+8; ++pretend) { + memset(&u8buffer[0], 0x99, u8buffer.size()); + uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], pretend); + EXPECT_EQUAL(wanted, got); + + if (u8buffer[pretend+1] != 0x99) { + printf("wrote 2 bytes too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x %02x\n", + wanted, pretend, u8buffer[pretend-1], + u8buffer[pretend], u8buffer[pretend+1]); + } else if (u8buffer[pretend] != 0x99) { + ++badnesses; + if (wanted > atleast) { + atleast = wanted; + printf("wrote 1 byte too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x\n", + wanted, pretend, u8buffer[pretend-1], u8buffer[pretend]); + } + } + } + + memset(&u8buffer[0], 0x99, u8buffer.size()); + uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], u8buffer.size()); + EXPECT_EQUAL(wanted, got); + + EXPECT_EQUAL('\0', u8buffer[got-1]); + EXPECT_EQUAL((uint8_t)0x99, u8buffer[got]); + } + if (++num >= 10000) { + TEST_FLUSH(); + num=0; + } + } + EXPECT_EQUAL(0u, badnesses); +} + +TEST_APPHOOK(Test); + +int Test::Main() +{ + TEST_INIT("uca_stress"); + testFromDat(); + TEST_DONE(); +} |