aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/common/sortspec.cpp
blob: 04bc87f10007ee0b13fa8073ffa1657c64938261 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sortspec.h"
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/fastlib/text/normwordfolder.h>
#include <vespa/vespalib/text/utf8.h>
#include <stdexcept>

namespace search::common {

using vespalib::ConstBufferRef;
using vespalib::make_string;

ConstBufferRef
PassThroughConverter::onConvert(const ConstBufferRef & src) const
{
    return src;
}

LowercaseConverter::LowercaseConverter() noexcept
    : _buffer()
{
}

ConstBufferRef
LowercaseConverter::onConvert(const ConstBufferRef & src) const
{
    _buffer.clear();
    vespalib::stringref input((const char *)src.data(), src.size());
    vespalib::Utf8Reader r(input);
    vespalib::Utf8Writer w(_buffer);
    while (r.hasMore()) {
        ucs4_t c = r.getChar(0xFFFD);
        c = Fast_NormalizeWordFolder::ToFold(c);
        w.putChar(c);
    }
    return {_buffer.begin(), _buffer.size()};
}

SortInfo::SortInfo(vespalib::stringref field, bool ascending, BlobConverter::SP converter) noexcept
    : _field(field), _ascending(ascending), _converter(std::move(converter))
{ }
SortInfo::~SortInfo() = default;

SortSpec::SortSpec(const vespalib::string & spec, const ConverterFactory & ucaFactory) :
    _spec(spec)
{
    for (const char *pt(spec.c_str()), *mt(spec.c_str() + spec.size()); pt < mt;) {
        for (; pt < mt && *pt != '+' && *pt != '-'; pt++);
        if (pt != mt) {
            bool ascending = (*pt++ == '+');
            const char *vectorName = pt;
            for (;pt < mt && *pt != ' '; pt++);
            vespalib::string funcSpec(vectorName, pt - vectorName);
            const char * func = funcSpec.c_str();
            const char *p = func;
            const char *e = func+funcSpec.size();
            for(; (p < e) && (*p != '('); p++);
            if (*p == '(') {
                if (strncmp(func, "uca", std::min(3l, p-func)) == 0) {
                    p++;
                    const char * attrName = p;
                    for(; (p < e) && (*p != ','); p++);
                    if (*p == ',') {
                        vespalib::string attr(attrName, p-attrName);
                        p++;
                        const char *localeName = p;
                        for(; (p < e) && (*p != ')') && (*p != ','); p++);
                        if (*p == ',') {
                            vespalib::string locale(localeName, p-localeName);
                            p++;
                            const char *strengthName = p;
                            for(; (p < e) && (*p != ')'); p++);
                            if (*p == ')') {
                                vespalib::string strength(strengthName, p - strengthName);
                                emplace_back(attr, ascending, ucaFactory.create(locale, strength));
                            } else {
                                throw std::runtime_error(make_string("Missing ')' at %s attr=%s locale=%s strength=%s", p, attr.c_str(), localeName, strengthName));
                            }
                        } else if (*p == ')') {
                            vespalib::string locale(localeName, p-localeName);
                            emplace_back(attr, ascending, ucaFactory.create(locale, ""));
                        } else {
                            throw std::runtime_error(make_string("Missing ')' or ',' at %s attr=%s locale=%s", p, attr.c_str(), localeName));
                        }
                    } else {
                        throw std::runtime_error(make_string("Missing ',' at %s", p));
                    }
                } else if (strncmp(func, "lowercase", std::min(9l, p-func)) == 0) {
                    p++;
                    const char * attrName = p;
                    for(; (p < e) && (*p != ')'); p++);
                    if (*p == ')') {
                        vespalib::string attr(attrName, p-attrName);
                        emplace_back(attr, ascending, std::make_shared<LowercaseConverter>());
                    } else {
                        throw std::runtime_error("Missing ')'");
                    }
                } else {
                    throw std::runtime_error("Unknown func " + vespalib::string(func, p-func));
                }
            } else {
                emplace_back(funcSpec, ascending, std::shared_ptr<search::common::BlobConverter>());
            }
        }
    }
}

SortSpec::~SortSpec() = default;

}