summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/sort
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests/sort
Publish
Diffstat (limited to 'searchlib/src/tests/sort')
-rw-r--r--searchlib/src/tests/sort/.gitignore8
-rw-r--r--searchlib/src/tests/sort/CMakeLists.txt22
-rw-r--r--searchlib/src/tests/sort/DESC1
-rw-r--r--searchlib/src/tests/sort/FILES1
-rw-r--r--searchlib/src/tests/sort/javaorder.zh158
-rw-r--r--searchlib/src/tests/sort/sort_test.cpp295
-rw-r--r--searchlib/src/tests/sort/sortbenchmark.cpp115
-rw-r--r--searchlib/src/tests/sort/uca.cpp121
8 files changed, 721 insertions, 0 deletions
diff --git a/searchlib/src/tests/sort/.gitignore b/searchlib/src/tests/sort/.gitignore
new file mode 100644
index 00000000000..7207ff4596d
--- /dev/null
+++ b/searchlib/src/tests/sort/.gitignore
@@ -0,0 +1,8 @@
+.depend
+Makefile
+sort_test
+uca_stress
+/sortbenchmark
+searchlib_sort_test_app
+searchlib_sortbenchmark_app
+searchlib_uca_stress_app
diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt
new file mode 100644
index 00000000000..1830952bffd
--- /dev/null
+++ b/searchlib/src/tests/sort/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sortbenchmark_app
+ SOURCES
+ sortbenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sortbenchmark_app COMMAND searchlib_sortbenchmark_app BENCHMARK)
+vespa_add_executable(searchlib_sort_test_app
+ SOURCES
+ sort_test.cpp
+ DEPENDS
+ searchlib
+)
+#vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app)
+vespa_add_executable(searchlib_uca_stress_app
+ SOURCES
+ uca.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK)
diff --git a/searchlib/src/tests/sort/DESC b/searchlib/src/tests/sort/DESC
new file mode 100644
index 00000000000..ad8ab11f5ba
--- /dev/null
+++ b/searchlib/src/tests/sort/DESC
@@ -0,0 +1 @@
+Testing templatized radixsort.
diff --git a/searchlib/src/tests/sort/FILES b/searchlib/src/tests/sort/FILES
new file mode 100644
index 00000000000..e2ef9d3c1ab
--- /dev/null
+++ b/searchlib/src/tests/sort/FILES
@@ -0,0 +1 @@
+sort.cpp
diff --git a/searchlib/src/tests/sort/javaorder.zh b/searchlib/src/tests/sort/javaorder.zh
new file mode 100644
index 00000000000..0d29efc99bd
--- /dev/null
+++ b/searchlib/src/tests/sort/javaorder.zh
@@ -0,0 +1,158 @@
+
+
+
+30雜誌30雜誌
+asiatwnewsasiatwnews
+AZ時尚旅遊AZ時尚旅遊
+bobo小天才養成誌bobo小天才養成誌
+Career職場情報誌Career職場情報誌
+CheersCheers雜誌
+EMBAEMBA雜誌
+ETtodayETtoday
+FASHION QUEEN時尚女王FASHION QUEEN時尚女王
+iLOOKiLOOK電影雜誌
+men&amp;#39;s uno男人誌men&amp;#39;s uno男人誌
+Money 錢Money 錢
+NOWnewsNOWnews
+NOWnews今日新聞網
+PAR表演藝術PAR表演藝術雜誌
+Press Association ImagesPress Association Images
+Smart智富月刊Smart智富月刊
+Taipei WalkerTaipei Walker
+TSNATSNA
+TVBSTVBS
+Yahoo! Taiwan Specials without layoutYahoo! Taiwan Specials without layout
+Yahoo奇摩video.yahoo.com(勿用)
+Yahoo奇摩Yahoo奇摩(爆新聞)
+Yahoo奇摩Yahoo奇摩(新聞)
+Yahoo奇摩Yahoo奇摩(影音)
+Yahoo奇摩新聞Yahoo奇摩新聞(報氣象)
+YourNewsYourNews
+Y特別企畫Y特別企畫
+愛爾達愛爾達
+愛爾達愛爾達電視
+財訊快報財訊快報
+財訊快報財訊快報季刊
+財訊快報季刊財訊快報季刊
+財訊快報季刊財訊快報季刊
+財訊雙週刊財訊雙週刊
+常春月刊常春月刊
+朝鮮日報朝鮮日報
+達志達志
+達志達志
+達志達志
+大家健康雜誌大家健康雜誌
+大師輕鬆讀大師輕鬆讀
+大台灣旅遊網大台灣旅遊網
+東森新聞東森新聞
+東森新聞東森新聞
+東星東星
+俄羅斯新聞網俄羅斯新聞網
+法新社法新社
+非凡新聞非凡新聞
+非凡新聞節目非凡新聞節目
+富爾特消費新聞富爾特消費新聞
+公共電視公共電視
+公視公視
+古美術古美術
+管理雜誌管理雜誌
+光華雜誌台灣光華雜誌
+廣編特輯廣編特輯
+廣告雜誌廣告雜誌
+國際商情雙周刊國際商情雙周刊
+哈佛商業評論哈佛商業評論
+韓國朝鮮日報韓國朝鮮日報
+韓國中央日報韓國中央日報
+韓國中央日報韓國中央日報
+韓星網韓星網
+韓星網韓星網
+華人健康網華人健康網
+華視華視
+華視華視
+華視華視
+環境資訊中心環境資訊中心
+健康醫療網健康醫療網
+健康醫療網健康醫療網
+講義雜誌講義雜誌
+教育廣播電台國立教育廣播電台
+今藝術今藝術
+今周刊今周刊
+今周刊今周刊
+經理人經理人月刊
+鉅亨網鉅亨網
+軍聞社軍聞社
+卡優新聞網卡優新聞網
+康健雜誌康健雜誌
+科學人科學人雜誌
+客家電視客家電視台
+酷搜圖聞酷搜圖聞
+理財周刊理財周刊
+麗台運動報麗台運動報
+聯合文學聯合文學
+聯合新聞網聯合新聞網
+路透社路透社
+路透社路透社
+旅遊經旅遊經
+羅開Golf 頻道羅開Golf 頻道
+媽媽寶寶媽媽寶寶
+美麗佳人美麗佳人雜誌
+美聯社美聯社
+美通社美通社
+民視民視
+民視民視
+明報周刊明報周刊
+男人幫男人幫
+能力雜誌能力雜誌
+年代新聞年代新聞
+年代新聞年代新聞
+年代新聞年代新聞
+年代新聞年代新聞
+紐約時報中文網.紐約時報中文網.
+紐約時報中文網.紐約時報中文網.
+紐約時報中文網紐約時報中文網
+儂儂雜誌儂儂雜誌
+蓬勃網球蓬勃網球
+蘋果日報蘋果日報
+親子天下親子天下
+全國廣播全國廣播
+全球中央全球中央雜誌
+商業周刊商業周刊
+數位家庭數位家庭
+數位時代數位時代
+台灣立報台灣立報
+台灣新生報台灣新生報
+台灣醒報台灣醒報
+台灣醒報台灣醒報
+臺灣時報臺灣時報
+天下雜誌天下雜誌
+統一獅 Video統一獅 Video
+玩高爾夫玩高爾夫
+旺報旺報
+先探投資週刊先探投資週刊
+現代保險健康理財雜誌現代保險健康理財雜誌
+香港中文大學EMBA Videos香港中文大學EMBA
+新頭殼新頭殼
+新新聞周刊新新聞周刊
+兄弟象 Video兄弟象 Video
+訊息快遞Yahoo 奇摩新聞訊息快遞
+野球人野球人
+壹電視壹電視
+壹電視壹電視
+壹蘋果壹蘋果
+義大犀牛 Video義大犀牛 Video
+優活健康網優活健康網
+原視原視
+遠見雜誌遠見雜誌
+張老師月刊張老師月刊
+整形達人整形達人
+中廣中廣新聞網
+中華日報中華日報
+中華職棒中華職棒
+中時電子報中時電子報
+中央廣播電台中央廣播電台
+中央日報中央日報
+中央社中央社
+中央社中央社
+自立晚報自立晚報
+自由時報自由時報
+自由時報自由時報
diff --git a/searchlib/src/tests/sort/sort_test.cpp b/searchlib/src/tests/sort/sort_test.cpp
new file mode 100644
index 00000000000..cf5e1a1cb1f
--- /dev/null
+++ b/searchlib/src/tests/sort/sort_test.cpp
@@ -0,0 +1,295 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/array.h>
+#include <vector>
+#include <fstream>
+#include <iostream>
+#include <iomanip>
+#include <stdexcept>
+#include <unicode/ustring.h>
+
+LOG_SETUP("sort_test");
+
+using vespalib::Array;
+using namespace search::common;
+using vespalib::ConstBufferRef;
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+ void testUnsignedIntegerSort();
+ template <typename T>
+ void testSignedIntegerSort();
+ void testStringSort();
+ void testIcu();
+ void testStringCaseInsensitiveSort();
+ void testSortSpec();
+ void testSameAsJavaOrder();
+};
+
+struct LoadedStrings
+{
+ LoadedStrings(const char * v=NULL) : _value(v), _currRadix(_value) { }
+
+ class ValueRadix
+ {
+ public:
+ char operator () (LoadedStrings & x) const {
+ unsigned char c(*x._currRadix);
+ if (c) {
+ x._currRadix++;
+ }
+ return c;
+ }
+ };
+
+ class ValueCompare : public std::binary_function<LoadedStrings, LoadedStrings, bool> {
+ public:
+ bool operator() (const LoadedStrings & x, const LoadedStrings & y) const {
+ return strcmp(x._value, y._value) < 0;
+ }
+ };
+ const char * _value;
+ const char * _currRadix;
+};
+
+void Test::testIcu()
+{
+ {
+ const std::string src("Creation of Bob2007 this is atumated string\this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string; _ 12345567890-=,./;'[;");
+ std::vector<uint16_t> u16Buffer(100);
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t u16Wanted(0);
+ u_strFromUTF8(&u16Buffer[0], u16Buffer.size(), &u16Wanted, src.c_str(), -1, &status);
+ ASSERT_TRUE(U_SUCCESS(status) || (status == U_INVALID_CHAR_FOUND) || ((status == U_BUFFER_OVERFLOW_ERROR) && (u16Wanted > (int)u16Buffer.size())));
+ }
+}
+
+void Test::testUnsignedIntegerSort()
+{
+ search::NumericRadixSorter<uint32_t, true> S;
+ S(NULL, 0);
+
+ Array<uint32_t> array1(1);
+ array1[0] = 1567;
+ S(&array1[0], 1);
+ ASSERT_TRUE(array1[0] == 1567);
+
+ unsigned int N(0x100000);
+ Array<uint32_t> array(N);
+ unsigned seed(1);
+ for(size_t i(0); i < N; i++) {
+ array[i] = rand_r(&seed);
+ }
+ S(&array[0], N);
+ for (size_t i(1); i < N; i++) {
+ ASSERT_TRUE(array[i] >= array[i-1]);
+ }
+}
+
+template<typename T>
+class IntOrder {
+public:
+ uint64_t operator () (T v) const { return v ^ (std::numeric_limits<T>::max() + 1); }
+};
+
+template <typename T>
+void Test::testSignedIntegerSort()
+{
+ search::NumericRadixSorter<T, true> S;
+ S(NULL, 0);
+
+ Array<T> array1(1);
+ array1[0] = 1567;
+ S(&array1[0], 1);
+ ASSERT_TRUE(array1[0] == 1567);
+
+ unsigned int N(0x100000);
+ Array<T> array(N);
+ unsigned seed(1);
+ for(size_t i(0); i < N; i++) {
+ T v = rand_r(&seed);
+ array[i] = (i%2) ? v : -v;
+ }
+ S(&array[0], N);
+ for (size_t i(1); i < N; i++) {
+ ASSERT_TRUE(array[i] >= array[i-1]);
+ }
+}
+
+void Test::testStringSort()
+{
+ Array<LoadedStrings> array1(1);
+
+ unsigned int N(0x1000);
+ Array<LoadedStrings> loaded(N);
+ std::vector<uint32_t> radixScratchPad(N);
+ search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, static_cast<LoadedStrings *>(NULL), 0, &radixScratchPad[0], 0);
+
+ array1[0] = LoadedStrings("a");
+ search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &array1[0], 1, &radixScratchPad[0], 0);
+ ASSERT_TRUE(strcmp(array1[0]._value, "a") == 0);
+
+ loaded[0] = LoadedStrings("a");
+ for(size_t i(1); i < N; i++) {
+ loaded[i] = LoadedStrings("");
+ }
+
+ search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &loaded[0], N, &radixScratchPad[0], 0);
+ LoadedStrings::ValueCompare vc;
+ for(size_t i(1); i < N; i++) {
+ ASSERT_TRUE( ! vc(loaded[i], loaded[i-1]));
+ }
+}
+
+void Test::testStringCaseInsensitiveSort()
+{
+}
+
+void Test::testSortSpec()
+{
+ {
+ SortSpec sortspec("-name");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() == NULL);
+ }
+
+ {
+ SortSpec sortspec("-lowercase(name)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<LowercaseConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+
+ {
+ SortSpec sortspec("-uca(name,nn_no)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,PRIMARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,SECONDARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,TERTIARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,QUATERNARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,IDENTICAL)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,zh)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,finnes_ikke)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ try {
+ SortSpec sortspec("-uca(name,nn_no,NTERTIARY)");
+ EXPECT_TRUE(false);
+ } catch (const std::runtime_error & e) {
+ EXPECT_TRUE(true);
+ EXPECT_TRUE(strcmp(e.what(), "Illegal uca collation strength : NTERTIARY") == 0);
+ }
+ }
+}
+
+void Test::testSameAsJavaOrder()
+{
+ std::vector<vespalib::string> javaOrder;
+ std::ifstream is("javaorder.zh");
+ while (!is.eof()) {
+ std::string line;
+ getline(is, line);
+ if (!is.eof()) {
+ javaOrder.push_back(line);
+ }
+ }
+ EXPECT_EQUAL(158u, javaOrder.size());
+ search::common::UcaConverter uca("zh", "PRIMARY");
+ vespalib::ConstBufferRef fkey = uca.convert(vespalib::ConstBufferRef(javaOrder[0].c_str(), javaOrder[0].size()));
+ vespalib::string prev(fkey.c_str(), fkey.size());
+ for (size_t i(1); i < javaOrder.size(); i++) {
+ vespalib::ConstBufferRef key = uca.convert(vespalib::ConstBufferRef(javaOrder[i].c_str(), javaOrder[i].size()));
+ vespalib::HexDump dump(key.c_str(), key.size());
+ vespalib::string current(key.c_str(), key.size());
+ UErrorCode status(U_ZERO_ERROR);
+ UCollationResult cr = uca.getCollator().compareUTF8(javaOrder[i-1].c_str(), javaOrder[i].c_str(), status);
+ std::cout << std::setw(3) << i << ": " << status << "(" << u_errorName(status) << ") - " << cr << " '" << dump << "' : '" << javaOrder[i] << "'" << std::endl;
+ EXPECT_TRUE(prev <= current);
+ EXPECT_TRUE(U_SUCCESS(status));
+ EXPECT_TRUE(cr == UCOL_LESS || cr == UCOL_EQUAL);
+ prev = current;
+ }
+}
+
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+ TEST_INIT("sort_test");
+
+ testUnsignedIntegerSort();
+ testSignedIntegerSort<int32_t>();
+ testSignedIntegerSort<int64_t>();
+ testStringSort();
+ testStringCaseInsensitiveSort();
+ testSortSpec();
+ testIcu();
+ testSameAsJavaOrder();
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/sort/sortbenchmark.cpp b/searchlib/src/tests/sort/sortbenchmark.cpp
new file mode 100644
index 00000000000..1309cf57d5d
--- /dev/null
+++ b/searchlib/src/tests/sort/sortbenchmark.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/vespalib/util/array.h>
+#include <vector>
+
+LOG_SETUP("sort_test");
+
+using vespalib::Array;
+using vespalib::ConstBufferRef;
+
+class Test : public vespalib::TestApp
+{
+public:
+ typedef std::vector<uint32_t> V;
+ std::vector< std::vector<uint32_t> > _data;
+ int Main();
+ void generateVectors(size_t numVectors, size_t values);
+ V merge();
+ void twoWayMerge();
+ V cat() const;
+};
+
+void Test::generateVectors(size_t numVectors, size_t values)
+{
+ _data.resize(numVectors);
+ for (size_t j(0); j < numVectors; j++) {
+ V & v(_data[j]);
+ v.resize(values);
+ for (size_t i(0); i < values; i++) {
+ v[i] = i;
+ }
+ }
+}
+
+Test::V Test::merge()
+{
+ twoWayMerge();
+ return _data[0];
+}
+
+void Test::twoWayMerge()
+{
+ std::vector<V> n((_data.size()+1)/2);
+
+ for ( size_t i(0), m(_data.size()/2); i < m; i++) {
+ const V & a = _data[i*2 + 0];
+ const V & b = _data[i*2 + 1];
+ n[i].resize(a.size() + b.size());
+ std::merge(a.begin(), a.end(), b.begin(), b.end(), n[i].begin());
+ }
+ if (_data.size()%2) {
+ n[n.size()-1].swap(_data[_data.size() - 1]);
+ }
+ _data.swap(n);
+ if (_data.size() > 1) {
+ twoWayMerge();
+ }
+}
+
+Test::V Test::cat() const
+{
+ size_t sum(0);
+ for (size_t i(0), m(_data.size()); i < m; i++) {
+ sum += _data[i].size();
+ }
+ V c;
+ c.reserve(sum);
+ for (size_t i(0), m(_data.size()); i < m; i++) {
+ const V & v(_data[i]);
+ c.insert(c.end(), v.begin(), v.end());
+ }
+
+ return c;
+}
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+ TEST_INIT("sortbenchmark");
+ size_t numVectors(11);
+ size_t values(10000000);
+ vespalib::string type("radix");
+ if (_argc > 1) {
+ values = strtol(_argv[1], NULL, 0);
+ if (_argc > 2) {
+ numVectors = strtol(_argv[2], NULL, 0);
+ if (_argc > 2) {
+ type = _argv[3];
+ }
+ }
+ }
+
+ printf("Start with %ld vectors with %ld values and type '%s'(radix, qsort, merge)\n", numVectors, values, type.c_str());
+ generateVectors(numVectors, values);
+ printf("Start cat\n");
+ V v = cat();
+ printf("Cat %ld values\n", v.size());
+ if (type == "merge") {
+ V m = merge();
+ printf("Merged %ld values\n", m.size());
+ } else if (type == "qsort") {
+ std::sort(v.begin(), v.end());
+ printf("sorted %ld value with std::sort\n", v.size());
+ } else {
+ search::NumericRadixSorter<uint32_t, true> S;
+ S(&v[0], v.size());
+ printf("sorted %ld value with radix::sort\n", v.size());
+ }
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/sort/uca.cpp b/searchlib/src/tests/sort/uca.cpp
new file mode 100644
index 00000000000..b9225c94a66
--- /dev/null
+++ b/searchlib/src/tests/sort/uca.cpp
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/array.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include <unicode/ustring.h>
+
+LOG_SETUP("uca_stress");
+
+using icu::Collator;
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+ void testFromDat();
+};
+
+
+void Test::testFromDat()
+{
+ size_t badnesses = 0;
+
+ std::string startMark("abc");
+ std::string midMark("def");
+ std::string endMark("ghi");
+
+ UErrorCode status = U_ZERO_ERROR;
+ auto coll = std::unique_ptr<Collator>(Collator::createInstance(icu::Locale("en"), status));
+
+ coll->setStrength(Collator::PRIMARY);
+
+ std::vector<uint16_t> u16buffer(100);
+ std::vector<uint8_t> u8buffer(10);
+
+ int fd = open("sort-blobs.dat", O_RDONLY);
+ char sbuf[4];
+
+ int num=0;
+
+ uint32_t atleast = 0;
+
+ while (read(fd, sbuf, 4) == 4) {
+ if (startMark == sbuf) {
+ uint32_t len = 0;
+ int r = read(fd, &len, 4);
+
+ EXPECT_EQUAL(4, r);
+ r = read(fd, sbuf, 4);
+ EXPECT_EQUAL(4, r);
+ EXPECT_EQUAL(midMark, sbuf);
+
+ if (u16buffer.size() < len) {
+ u16buffer.resize(len);
+ }
+ r = read(fd, &u16buffer[0], len*2);
+ EXPECT_EQUAL((int)len*2, r);
+
+ r = read(fd, sbuf, 4);
+ EXPECT_EQUAL(4, r);
+ EXPECT_EQUAL(endMark, sbuf);
+
+ uint32_t wanted = coll->getSortKey(&u16buffer[0], len, NULL, 0);
+
+ EXPECT_TRUE(wanted > 0);
+ EXPECT_TRUE(wanted >= len);
+ EXPECT_TRUE(wanted < len*6);
+
+ if (wanted + 20 > u8buffer.size()) {
+ u8buffer.resize(wanted+20);
+ }
+
+ for (uint32_t pretend = 1; pretend < wanted+8; ++pretend) {
+ memset(&u8buffer[0], 0x99, u8buffer.size());
+ uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], pretend);
+ EXPECT_EQUAL(wanted, got);
+
+ if (u8buffer[pretend+1] != 0x99) {
+ printf("wrote 2 bytes too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x %02x\n",
+ wanted, pretend, u8buffer[pretend-1],
+ u8buffer[pretend], u8buffer[pretend+1]);
+ } else if (u8buffer[pretend] != 0x99) {
+ ++badnesses;
+ if (wanted > atleast) {
+ atleast = wanted;
+ printf("wrote 1 byte too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x\n",
+ wanted, pretend, u8buffer[pretend-1], u8buffer[pretend]);
+ }
+ }
+ }
+
+ memset(&u8buffer[0], 0x99, u8buffer.size());
+ uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], u8buffer.size());
+ EXPECT_EQUAL(wanted, got);
+
+ EXPECT_EQUAL('\0', u8buffer[got-1]);
+ EXPECT_EQUAL((uint8_t)0x99, u8buffer[got]);
+ }
+ if (++num >= 10000) {
+ TEST_FLUSH();
+ num=0;
+ }
+ }
+ EXPECT_EQUAL(0u, badnesses);
+}
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+ TEST_INIT("uca_stress");
+ testFromDat();
+ TEST_DONE();
+}