aboutsummaryrefslogtreecommitdiffstats
path: root/searchsummary/src/vespa/juniper/appender.cpp
blob: 4d55f62a27a6b6b3ee9e169a5b4a93140061e77d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "appender.h"
#include "juniperdebug.h"
#define _NEED_SUMMARY_CONFIG_IMPL
#include "SummaryConfig.h"

namespace juniper {

void
Appender::append(std::vector<char> & s, char c)
{
    JD_INVAR(JD_INPUT, c != 0, return,\
             LOG(warning, "Document source contained 0-bytes"));
    // eliminate separators:
    if (_sumconf->separator(c)) {
        return;
    }

    // eliminate multiple space characters
    if (!_preserve_white_space) {
        if (c > 0 && isspace(c)) {
            if (_last_was_space) {
                return;
            } else {
                _last_was_space = true;
            }
            c = ' '; // Never output newline or tab
        } else {
            _last_was_space = false;
        }
    }

    bool handled_as_markup;
    if (_escape_markup) {
        handled_as_markup = true;
        switch (c) {
        case '<':
            s.push_back('&');
            s.push_back('l');
            s.push_back('t');
            s.push_back(';');
            break;
        case '>':
            s.push_back('&');
            s.push_back('g');
            s.push_back('t');
            s.push_back(';');
            break;
        case '"':
            s.push_back('&');
            s.push_back('q');
            s.push_back('u');
            s.push_back('o');
            s.push_back('t');
            s.push_back(';');
            break;
        case '&':
            s.push_back('&');
            s.push_back('a');
            s.push_back('m');
            s.push_back('p');
            s.push_back(';');
            break;
        case '\'':
            s.push_back('&');
            s.push_back('#');
            s.push_back('3');
            s.push_back('9');
            s.push_back(';');
            break;
        default:
            handled_as_markup = false;
            break;
        }
        if (handled_as_markup) {
            _char_len++;
        }
    } else {
        handled_as_markup = false;
    }

    if (!handled_as_markup) {
        s.push_back(c);
        /** If at start of an UTF8 character (both highest bits or none of them set)
         *  another char is accumulated..
         */
        if (!(c & 0x80) || (c & 0x40) ) {
            _char_len++;
        }
    }
}

Appender::Appender(const SummaryConfig *sumconf)
    : _sumconf(sumconf),
      _escape_markup(false),
      _preserve_white_space(false),
      _last_was_space(false),
      _char_len(0)
{
    ConfigFlag esc_conf = _sumconf->escape_markup();

    switch (esc_conf) {
    case CF_OFF:
        _escape_markup = false;
        break;
    case CF_ON:
        _escape_markup = true;
        break;
    case CF_AUTO:
        _escape_markup = (_sumconf->highlight_on()[0] == '<' ||
                          _sumconf->highlight_off()[0] == '<' ||
                          _sumconf->dots()[0] == '<');
        break;
    }

    if (_sumconf->preserve_white_space() == CF_ON) {
        _preserve_white_space = true;
    }
}

void
Appender::append(std::vector<char>& s, const char* ds, int length) {
    for (int i = 0; i < length; i++) {
        append(s, ds[i]);
    }
}

}