aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2016-09-11 20:50:26 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2016-09-11 20:51:08 +0000
commit9f13fec939fd0a28547753d122eccd1f2e19785e (patch)
tree274f4944903bdc31efe134e94b1ef7648e390b9c /searchlib
parent0228ffd55f264315531ada146c113aed6efc2f22 (diff)
Do not require icu unless you really need it.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt2
-rw-r--r--searchlib/src/tests/forcelink/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/sort/CMakeLists.txt2
-rw-r--r--searchlib/src/tests/sort/sort_test.cpp27
-rw-r--r--searchlib/src/tests/sort/uca.cpp1
-rw-r--r--searchlib/src/tests/sortspec/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/sortspec/multilevelsort.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/common/converters.h46
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.h14
-rw-r--r--searchlib/src/vespa/searchlib/common/sortspec.cpp77
-rw-r--r--searchlib/src/vespa/searchlib/common/sortspec.h3
-rw-r--r--searchlib/src/vespa/searchlib/expression/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/uca/CMakeLists.txt10
-rw-r--r--searchlib/src/vespa/searchlib/uca/ucaconverter.cpp98
-rw-r--r--searchlib/src/vespa/searchlib/uca/ucaconverter.h63
-rw-r--r--searchlib/src/vespa/searchlib/uca/ucafunctionnode.cpp (renamed from searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp)8
-rw-r--r--searchlib/src/vespa/searchlib/uca/ucafunctionnode.h (renamed from searchlib/src/vespa/searchlib/expression/ucafunctionnode.h)0
19 files changed, 222 insertions, 142 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index c03969bc928..df3e40ebd6d 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -16,7 +16,6 @@ vespa_define_module(
searchcommon
EXTERNAL_DEPENDS
- icui18n
rt
LIBS
@@ -52,6 +51,7 @@ vespa_define_module(
src/vespa/searchlib/test/fakedata
src/vespa/searchlib/test/memoryindex
src/vespa/searchlib/transactionlog
+ src/vespa/searchlib/uca
src/vespa/searchlib/util
APPS
diff --git a/searchlib/src/tests/forcelink/CMakeLists.txt b/searchlib/src/tests/forcelink/CMakeLists.txt
index 48c9b4fd1a3..59031c2cd4d 100644
--- a/searchlib/src/tests/forcelink/CMakeLists.txt
+++ b/searchlib/src/tests/forcelink/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_forcelink_test_app TEST
forcelink.cpp
DEPENDS
searchlib
+ searchlib_searchlib_uca
)
vespa_add_test(NAME searchlib_forcelink_test_app COMMAND searchlib_forcelink_test_app)
diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt
index 1830952bffd..54f2fcdd3d7 100644
--- a/searchlib/src/tests/sort/CMakeLists.txt
+++ b/searchlib/src/tests/sort/CMakeLists.txt
@@ -11,6 +11,7 @@ vespa_add_executable(searchlib_sort_test_app
sort_test.cpp
DEPENDS
searchlib
+ searchlib_searchlib_uca
)
#vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app)
vespa_add_executable(searchlib_uca_stress_app
@@ -18,5 +19,6 @@ vespa_add_executable(searchlib_uca_stress_app
uca.cpp
DEPENDS
searchlib
+ searchlib_searchlib_uca
)
vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK)
diff --git a/searchlib/src/tests/sort/sort_test.cpp b/searchlib/src/tests/sort/sort_test.cpp
index cf5e1a1cb1f..1af12c7aecb 100644
--- a/searchlib/src/tests/sort/sort_test.cpp
+++ b/searchlib/src/tests/sort/sort_test.cpp
@@ -5,6 +5,7 @@
#include <vespa/searchlib/common/sort.h>
#include <vespa/searchlib/common/sortspec.h>
#include <vespa/searchlib/common/converters.h>
+#include <vespa/searchlib/uca/ucaconverter.h>
#include <vespa/vespalib/util/array.h>
#include <vector>
#include <fstream>
@@ -17,6 +18,7 @@ LOG_SETUP("sort_test");
using vespalib::Array;
using namespace search::common;
+using namespace search::uca;
using vespalib::ConstBufferRef;
class Test : public vespalib::TestApp
@@ -154,8 +156,9 @@ void Test::testStringCaseInsensitiveSort()
void Test::testSortSpec()
{
+ UcaConverterFactory ucaFactory;
{
- SortSpec sortspec("-name");
+ SortSpec sortspec("-name", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -163,7 +166,7 @@ void Test::testSortSpec()
}
{
- SortSpec sortspec("-lowercase(name)");
+ SortSpec sortspec("-lowercase(name)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -172,7 +175,7 @@ void Test::testSortSpec()
}
{
- SortSpec sortspec("-uca(name,nn_no)");
+ SortSpec sortspec("-uca(name,nn_no)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -180,7 +183,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,nn_no,PRIMARY)");
+ SortSpec sortspec("-uca(name,nn_no,PRIMARY)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -188,7 +191,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,nn_no,SECONDARY)");
+ SortSpec sortspec("-uca(name,nn_no,SECONDARY)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -196,7 +199,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,nn_no,TERTIARY)");
+ SortSpec sortspec("-uca(name,nn_no,TERTIARY)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -204,7 +207,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,nn_no,QUATERNARY)");
+ SortSpec sortspec("-uca(name,nn_no,QUATERNARY)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -212,7 +215,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,nn_no,IDENTICAL)");
+ SortSpec sortspec("-uca(name,nn_no,IDENTICAL)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -220,7 +223,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,zh)");
+ SortSpec sortspec("-uca(name,zh)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -228,7 +231,7 @@ void Test::testSortSpec()
EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
}
{
- SortSpec sortspec("-uca(name,finnes_ikke)");
+ SortSpec sortspec("-uca(name,finnes_ikke)", ucaFactory);
EXPECT_EQUAL(sortspec.size(), 1u);
EXPECT_EQUAL(sortspec[0]._field, "name");
EXPECT_TRUE( ! sortspec[0]._ascending);
@@ -237,7 +240,7 @@ void Test::testSortSpec()
}
{
try {
- SortSpec sortspec("-uca(name,nn_no,NTERTIARY)");
+ SortSpec sortspec("-uca(name,nn_no,NTERTIARY)", ucaFactory);
EXPECT_TRUE(false);
} catch (const std::runtime_error & e) {
EXPECT_TRUE(true);
@@ -258,7 +261,7 @@ void Test::testSameAsJavaOrder()
}
}
EXPECT_EQUAL(158u, javaOrder.size());
- search::common::UcaConverter uca("zh", "PRIMARY");
+ UcaConverter uca("zh", "PRIMARY");
vespalib::ConstBufferRef fkey = uca.convert(vespalib::ConstBufferRef(javaOrder[0].c_str(), javaOrder[0].size()));
vespalib::string prev(fkey.c_str(), fkey.size());
for (size_t i(1); i < javaOrder.size(); i++) {
diff --git a/searchlib/src/tests/sort/uca.cpp b/searchlib/src/tests/sort/uca.cpp
index b9225c94a66..121e53a9e7c 100644
--- a/searchlib/src/tests/sort/uca.cpp
+++ b/searchlib/src/tests/sort/uca.cpp
@@ -11,6 +11,7 @@
#include <vector>
#include <stdexcept>
#include <unicode/ustring.h>
+#include <unicode/coll.h>
LOG_SETUP("uca_stress");
diff --git a/searchlib/src/tests/sortspec/CMakeLists.txt b/searchlib/src/tests/sortspec/CMakeLists.txt
index a59a5c102f7..0522f1725a3 100644
--- a/searchlib/src/tests/sortspec/CMakeLists.txt
+++ b/searchlib/src/tests/sortspec/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_multilevelsort_test_app TEST
multilevelsort.cpp
DEPENDS
searchlib
+ searchlib_searchlib_uca
)
vespa_add_test(NAME searchlib_multilevelsort_test_app COMMAND searchlib_multilevelsort_test_app)
diff --git a/searchlib/src/tests/sortspec/multilevelsort.cpp b/searchlib/src/tests/sortspec/multilevelsort.cpp
index f151bfaf132..c65683a1754 100644
--- a/searchlib/src/tests/sortspec/multilevelsort.cpp
+++ b/searchlib/src/tests/sortspec/multilevelsort.cpp
@@ -10,6 +10,7 @@ LOG_SETUP("multilevelsort_test");
#include <vespa/searchlib/attribute/stringbase.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/uca/ucaconverter.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <map>
#include <sstream>
@@ -251,7 +252,8 @@ MultilevelSortTest::sortAndCheck(const std::vector<Spec> &spec, uint32_t num,
vespalib::Clock clock;
vespalib::Doom doom(clock, std::numeric_limits<long>::max());
- FastS_SortSpec sorter(doom, _sortMethod);
+ search::uca::UcaConverterFactory ucaFactory;
+ FastS_SortSpec sorter(doom, ucaFactory, _sortMethod);
// init sorter with sort data
for(uint32_t i = 0; i < spec.size(); ++i) {
AttributeGuard ag;
diff --git a/searchlib/src/vespa/searchlib/CMakeLists.txt b/searchlib/src/vespa/searchlib/CMakeLists.txt
index 0f0420e5b61..bf3cabb460b 100644
--- a/searchlib/src/vespa/searchlib/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/CMakeLists.txt
@@ -32,6 +32,5 @@ vespa_add_library(searchlib
INSTALL lib64
DEPENDS
staging_vespalib
- icuuc
atomic
)
diff --git a/searchlib/src/vespa/searchlib/common/converters.h b/searchlib/src/vespa/searchlib/common/converters.h
index ccd15c6105c..9eadeb5a8c5 100644
--- a/searchlib/src/vespa/searchlib/common/converters.h
+++ b/searchlib/src/vespa/searchlib/common/converters.h
@@ -3,7 +3,6 @@
#pragma once
#include <vespa/searchcommon/common/iblobconverter.h>
-#include <unicode/coll.h>
#include <vector>
#include <vespa/vespalib/stllike/string.h>
@@ -13,7 +12,7 @@ namespace common {
class PassThroughConverter : public BlobConverter
{
private:
- virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const;
+ virtual ConstBufferRef onConvert(const ConstBufferRef & src) const;
};
class LowercaseConverter : public BlobConverter
@@ -21,47 +20,16 @@ class LowercaseConverter : public BlobConverter
public:
LowercaseConverter();
private:
- virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const;
+ virtual ConstBufferRef onConvert(const ConstBufferRef & src) const;
mutable vespalib::string _buffer;
};
-class UcaConverter : public BlobConverter
-{
+class ConverterFactory {
+protected:
+ using stringref = vespalib::stringref;
public:
- typedef icu::Collator Collator;
- UcaConverter(const vespalib::string & locale, const vespalib::string & strength);
- const Collator & getCollator() const { return *_collator; }
-private:
- struct Buffer {
- vespalib::string _data;
- uint8_t *ptr() { return (uint8_t *)_data.begin(); }
- int32_t siz() { return _data.size(); }
- Buffer() : _data() {
- reserve(_data.capacity()-8); // do not cause extra malloc() by default
- }
- void reserve(size_t size) {
- _data.reserve(size+8);
- _data.resize(size);
- _data[size+1] = '\0';
- _data[size+2] = '\0';
- _data[size+3] = 'd';
- _data[size+4] = 'e';
- _data[size+5] = 'a';
- _data[size+6] = 'd';
- _data[size+7] = '\0';
- }
- void check() {
- assert(_data[siz()+3] == 'd');
- assert(_data[siz()+4] == 'e');
- assert(_data[siz()+5] == 'a');
- assert(_data[siz()+6] == 'd');
- }
- };
- int utf8ToUtf16(const vespalib::ConstBufferRef & src) const;
- virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const;
- mutable Buffer _buffer;
- mutable std::vector<UChar> _u16Buffer;
- std::unique_ptr<Collator> _collator;
+ virtual ~ConverterFactory() { }
+ virtual BlobConverter::UP create(stringref local, stringref strength) const = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp
index c58f15a8372..485c0d550b9 100644
--- a/searchlib/src/vespa/searchlib/common/sortresults.cpp
+++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp
@@ -302,8 +302,9 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
}
-FastS_SortSpec::FastS_SortSpec(const vespalib::Doom & doom, int method) :
+FastS_SortSpec::FastS_SortSpec(const vespalib::Doom & doom, const ConverterFactory & ucaFactory, int method) :
_doom(doom),
+ _ucaFactory(ucaFactory),
_method(method),
_sortSpec(),
_vectors()
@@ -323,7 +324,7 @@ FastS_SortSpec::Init(const vespalib::string & sortStr, IAttributeContext & vecMa
LOG(spam, "sortStr = %s", sortStr.c_str());
bool retval(true);
try {
- _sortSpec = SortSpec(sortStr);
+ _sortSpec = SortSpec(sortStr, _ucaFactory);
for (SortSpec::const_iterator it(_sortSpec.begin()), mt(_sortSpec.end()); retval && (it < mt); it++) {
retval = Add(vecMan, *it);
}
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.h b/searchlib/src/vespa/searchlib/common/sortresults.h
index 8da643411a0..712150bc2c5 100644
--- a/searchlib/src/vespa/searchlib/common/sortresults.h
+++ b/searchlib/src/vespa/searchlib/common/sortresults.h
@@ -122,19 +122,21 @@ private:
typedef vespalib::AutoAlloc<0x800000> Alloc;
typedef vespalib::Array<uint8_t, Alloc> BinarySortData;
typedef vespalib::Array<SortData, Alloc> SortDataArray;
- vespalib::Doom _doom;
- int _method;
+ using ConverterFactory = search::common::ConverterFactory;
+ vespalib::Doom _doom;
+ const ConverterFactory & _ucaFactory;
+ int _method;
search::common::SortSpec _sortSpec;
- VectorRefList _vectors;
- BinarySortData _binarySortData;
- SortDataArray _sortDataArray;
+ VectorRefList _vectors;
+ BinarySortData _binarySortData;
+ SortDataArray _sortDataArray;
bool Add(search::attribute::IAttributeContext & vecMan, const search::common::SortInfo & sInfo);
void initSortData(const search::RankedHit *a, uint32_t n);
uint8_t * realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData);
public:
- FastS_SortSpec(const vespalib::Doom & doom, int method=2);
+ FastS_SortSpec(const vespalib::Doom & doom, const ConverterFactory & ucaFactory, int method=2);
virtual ~FastS_SortSpec();
std::pair<const char *, size_t> getSortRef(size_t i) const {
diff --git a/searchlib/src/vespa/searchlib/common/sortspec.cpp b/searchlib/src/vespa/searchlib/common/sortspec.cpp
index b522d76ebaa..b81c812b830 100644
--- a/searchlib/src/vespa/searchlib/common/sortspec.cpp
+++ b/searchlib/src/vespa/searchlib/common/sortspec.cpp
@@ -41,78 +41,7 @@ ConstBufferRef LowercaseConverter::onConvert(const ConstBufferRef & src) const
return ConstBufferRef(_buffer.begin(), _buffer.size());
}
-namespace {
- vespalib::Lock _GlobalDirtyICUThreadSafeLock;
-}
-
-UcaConverter::UcaConverter(const vespalib::string & locale, const vespalib::string & strength) :
- _buffer(),
- _u16Buffer(128),
- _collator()
-{
- UErrorCode status = U_ZERO_ERROR;
- Collator *coll(NULL);
- {
- vespalib::LockGuard guard(_GlobalDirtyICUThreadSafeLock);
- coll = Collator::createInstance(icu::Locale(locale.c_str()), status);
- }
- if(U_SUCCESS(status)) {
- _collator.reset(coll);
- if (strength.empty()) {
- _collator->setStrength(Collator::PRIMARY);
- } else if (strength == "PRIMARY") {
- _collator->setStrength(Collator::PRIMARY);
- } else if (strength == "SECONDARY") {
- _collator->setStrength(Collator::SECONDARY);
- } else if (strength == "TERTIARY") {
- _collator->setStrength(Collator::TERTIARY);
- } else if (strength == "QUATERNARY") {
- _collator->setStrength(Collator::QUATERNARY);
- } else if (strength == "IDENTICAL") {
- _collator->setStrength(Collator::IDENTICAL);
- } else {
- throw std::runtime_error("Illegal uca collation strength : " + strength);
- }
- } else {
- delete coll;
- throw std::runtime_error("Failed Collator::createInstance(Locale(locale.c_str()), status) with locale : " + locale);
- }
-}
-
-int UcaConverter::utf8ToUtf16(const ConstBufferRef & src) const
-{
- UErrorCode status = U_ZERO_ERROR;
- int32_t u16Wanted(0);
- u_strFromUTF8(&_u16Buffer[0], _u16Buffer.size(), &u16Wanted, static_cast<const char *>(src.data()), -1, &status);
- if (U_SUCCESS(status)) {
- } else if (status == U_INVALID_CHAR_FOUND) {
- LOG(warning, "ICU was not able to convert the %ld alleged utf8 characters'%s' to utf16", src.size(), src.c_str());
- } else if (status == U_BUFFER_OVERFLOW_ERROR) {
- //Ignore as this is handled on the outside.
- } else {
- LOG(warning, "ICU made a undefined complaint(%d) about the %ld alleged utf8 characters'%s' to utf16", status, src.size(), src.c_str());
- }
- return u16Wanted;
-}
-
-ConstBufferRef UcaConverter::onConvert(const ConstBufferRef & src) const
-{
- int32_t u16Wanted(utf8ToUtf16(src));
- if (u16Wanted > (int)_u16Buffer.size()) {
- _u16Buffer.resize(u16Wanted);
- u16Wanted = utf8ToUtf16(src);
- }
- int wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz());
- _buffer.check();
- if (wanted > _buffer.siz()) {
- _buffer.reserve(wanted);
- wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz());
- _buffer.check();
- }
- return ConstBufferRef(_buffer.ptr(), wanted);
-}
-
-SortSpec::SortSpec(const vespalib::string & spec) :
+SortSpec::SortSpec(const vespalib::string & spec, const ConverterFactory & ucaFactory) :
_spec(spec)
{
for (const char *pt(spec.c_str()), *mt(spec.c_str() + spec.size()); pt < mt;) {
@@ -143,13 +72,13 @@ SortSpec::SortSpec(const vespalib::string & spec) :
for(; (p < e) && (*p != ')'); p++);
if (*p == ')') {
vespalib::string strength(strengthName, p - strengthName);
- push_back(SortInfo(attr, ascending, BlobConverter::SP(new UcaConverter(locale, strength))));
+ push_back(SortInfo(attr, ascending, BlobConverter::SP(ucaFactory.create(locale, strength))));
} else {
throw std::runtime_error(make_string("Missing ')' at %s attr=%s locale=%s strength=%s", p, attr.c_str(), localeName, strengthName));
}
} else if (*p == ')') {
vespalib::string locale(localeName, p-localeName);
- push_back(SortInfo(attr, ascending, BlobConverter::SP(new UcaConverter(locale, ""))));
+ push_back(SortInfo(attr, ascending, BlobConverter::SP(ucaFactory.create(locale, ""))));
} else {
throw std::runtime_error(make_string("Missing ')' or ',' at %s attr=%s locale=%s", p, attr.c_str(), localeName));
}
diff --git a/searchlib/src/vespa/searchlib/common/sortspec.h b/searchlib/src/vespa/searchlib/common/sortspec.h
index bfa6a064105..80b5cfaf795 100644
--- a/searchlib/src/vespa/searchlib/common/sortspec.h
+++ b/searchlib/src/vespa/searchlib/common/sortspec.h
@@ -9,6 +9,7 @@
#include <vector>
#include <vespa/vespalib/stllike/string.h>
#include <vespa/searchcommon/common/iblobconverter.h>
+#include <vespa/searchlib/common/converters.h>
namespace search {
namespace common {
@@ -24,7 +25,7 @@ class SortSpec : public std::vector<SortInfo>
{
public:
SortSpec() : _spec() { }
- SortSpec(const vespalib::string & spec);
+ SortSpec(const vespalib::string & spec, const ConverterFactory & ucaFactory);
const vespalib::string & getSpec() const { return _spec; }
private:
vespalib::string _spec;
diff --git a/searchlib/src/vespa/searchlib/expression/CMakeLists.txt b/searchlib/src/vespa/searchlib/expression/CMakeLists.txt
index 1940f9736be..a82f5625c3c 100644
--- a/searchlib/src/vespa/searchlib/expression/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/expression/CMakeLists.txt
@@ -17,7 +17,6 @@ vespa_add_library(searchlib_expression OBJECT
documentfieldnode.cpp
attributenode.cpp
zcurve.cpp
- ucafunctionnode.cpp
debugwaitfunctionnode.cpp
mathfunctionnode.cpp
numericfunctionnode.cpp
diff --git a/searchlib/src/vespa/searchlib/uca/CMakeLists.txt b/searchlib/src/vespa/searchlib/uca/CMakeLists.txt
new file mode 100644
index 00000000000..7ff31f7c7a0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/uca/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_uca
+ SOURCES
+ ucaconverter.cpp
+ ucafunctionnode.cpp
+ INSTALL lib64
+ DEPENDS
+ icui18n
+ icuuc
+)
diff --git a/searchlib/src/vespa/searchlib/uca/ucaconverter.cpp b/searchlib/src/vespa/searchlib/uca/ucaconverter.cpp
new file mode 100644
index 00000000000..b084e1a59fe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/uca/ucaconverter.cpp
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/uca/ucaconverter.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/sync.h>
+#include <unicode/coll.h>
+#include <unicode/ustring.h>
+#include <stdexcept>
+#include <vespa/fastlib/text/normwordfolder.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".search.common.sortspec");
+
+namespace search {
+namespace uca {
+
+using vespalib::ConstBufferRef;
+using vespalib::make_string;
+
+namespace {
+ vespalib::Lock _GlobalDirtyICUThreadSafeLock;
+}
+
+BlobConverter::UP
+UcaConverterFactory::create(stringref local, stringref strength) const {
+ return std::make_unique<UcaConverter>(local, strength);
+}
+
+UcaConverter::UcaConverter(vespalib::stringref locale, vespalib::stringref strength) :
+ _buffer(),
+ _u16Buffer(128),
+ _collator()
+{
+ UErrorCode status = U_ZERO_ERROR;
+ Collator *coll(NULL);
+ {
+ vespalib::LockGuard guard(_GlobalDirtyICUThreadSafeLock);
+ coll = Collator::createInstance(icu::Locale(locale.c_str()), status);
+ }
+ if(U_SUCCESS(status)) {
+ _collator.reset(coll);
+ if (strength.empty()) {
+ _collator->setStrength(Collator::PRIMARY);
+ } else if (strength == "PRIMARY") {
+ _collator->setStrength(Collator::PRIMARY);
+ } else if (strength == "SECONDARY") {
+ _collator->setStrength(Collator::SECONDARY);
+ } else if (strength == "TERTIARY") {
+ _collator->setStrength(Collator::TERTIARY);
+ } else if (strength == "QUATERNARY") {
+ _collator->setStrength(Collator::QUATERNARY);
+ } else if (strength == "IDENTICAL") {
+ _collator->setStrength(Collator::IDENTICAL);
+ } else {
+ throw std::runtime_error("Illegal uca collation strength : " + strength);
+ }
+ } else {
+ delete coll;
+ throw std::runtime_error("Failed Collator::createInstance(Locale(locale.c_str()), status) with locale : " + locale);
+ }
+}
+
+int UcaConverter::utf8ToUtf16(const ConstBufferRef & src) const
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t u16Wanted(0);
+ u_strFromUTF8(&_u16Buffer[0], _u16Buffer.size(), &u16Wanted, static_cast<const char *>(src.data()), -1, &status);
+ if (U_SUCCESS(status)) {
+ } else if (status == U_INVALID_CHAR_FOUND) {
+ LOG(warning, "ICU was not able to convert the %ld alleged utf8 characters'%s' to utf16", src.size(), src.c_str());
+ } else if (status == U_BUFFER_OVERFLOW_ERROR) {
+ //Ignore as this is handled on the outside.
+ } else {
+ LOG(warning, "ICU made a undefined complaint(%d) about the %ld alleged utf8 characters'%s' to utf16", status, src.size(), src.c_str());
+ }
+ return u16Wanted;
+}
+
+ConstBufferRef UcaConverter::onConvert(const ConstBufferRef & src) const
+{
+ int32_t u16Wanted(utf8ToUtf16(src));
+ if (u16Wanted > (int)_u16Buffer.size()) {
+ _u16Buffer.resize(u16Wanted);
+ u16Wanted = utf8ToUtf16(src);
+ }
+ int wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz());
+ _buffer.check();
+ if (wanted > _buffer.siz()) {
+ _buffer.reserve(wanted);
+ wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz());
+ _buffer.check();
+ }
+ return ConstBufferRef(_buffer.ptr(), wanted);
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/uca/ucaconverter.h b/searchlib/src/vespa/searchlib/uca/ucaconverter.h
new file mode 100644
index 00000000000..c71cabda87c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/uca/ucaconverter.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/common/iblobconverter.h>
+#include <unicode/coll.h>
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+using common::BlobConverter;
+using common::ConverterFactory;
+
+namespace uca {
+
+class UcaConverterFactory : public ConverterFactory {
+public:
+ BlobConverter::UP create(stringref local, stringref strength) const override;
+};
+
+class UcaConverter : public BlobConverter
+{
+public:
+ using Collator = icu::Collator;
+ UcaConverter(vespalib::stringref locale, vespalib::stringref strength);
+ const Collator & getCollator() const { return *_collator; }
+private:
+ struct Buffer {
+ vespalib::string _data;
+ uint8_t *ptr() { return (uint8_t *)_data.begin(); }
+ int32_t siz() { return _data.size(); }
+ Buffer() : _data() {
+ reserve(_data.capacity()-8); // do not cause extra malloc() by default
+ }
+ void reserve(size_t size) {
+ _data.reserve(size+8);
+ _data.resize(size);
+ _data[size+1] = '\0';
+ _data[size+2] = '\0';
+ _data[size+3] = 'd';
+ _data[size+4] = 'e';
+ _data[size+5] = 'a';
+ _data[size+6] = 'd';
+ _data[size+7] = '\0';
+ }
+ void check() {
+ assert(_data[siz()+3] == 'd');
+ assert(_data[siz()+4] == 'e');
+ assert(_data[siz()+5] == 'a');
+ assert(_data[siz()+6] == 'd');
+ }
+ };
+ int utf8ToUtf16(const ConstBufferRef & src) const;
+ virtual ConstBufferRef onConvert(const ConstBufferRef & src) const;
+ mutable Buffer _buffer;
+ mutable std::vector<UChar> _u16Buffer;
+ std::unique_ptr<Collator> _collator;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp b/searchlib/src/vespa/searchlib/uca/ucafunctionnode.cpp
index 2cd4df49c5b..fa29f6f1547 100644
--- a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp
+++ b/searchlib/src/vespa/searchlib/uca/ucafunctionnode.cpp
@@ -1,7 +1,7 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/fastos/fastos.h>
-#include <vespa/searchlib/expression/ucafunctionnode.h>
-#include <vespa/searchlib/common/converters.h>
+#include <vespa/searchlib/uca/ucafunctionnode.h>
+#include <vespa/searchlib/uca/ucaconverter.h>
namespace search {
namespace expression {
@@ -24,7 +24,7 @@ UcaFunctionNode::UcaFunctionNode(const ExpressionNode::CP & arg, const vespalib:
UnaryFunctionNode(arg),
_locale(locale),
_strength(strength),
- _collator(new common::UcaConverter(locale, strength))
+ _collator(new uca::UcaConverter(locale, strength))
{
}
@@ -104,7 +104,7 @@ Deserializer & UcaFunctionNode::onDeserialize(Deserializer & is)
{
UnaryFunctionNode::onDeserialize(is);
is >> _locale >> _strength;
- _collator.reset(new common::UcaConverter(_locale, _strength));
+ _collator.reset(new uca::UcaConverter(_locale, _strength));
return is;
}
diff --git a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h b/searchlib/src/vespa/searchlib/uca/ucafunctionnode.h
index 78242d9cbd1..78242d9cbd1 100644
--- a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h
+++ b/searchlib/src/vespa/searchlib/uca/ucafunctionnode.h