aboutsummaryrefslogtreecommitdiffstats
path: root/juniper/src/vespa/juniper/appender.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'juniper/src/vespa/juniper/appender.cpp')
-rw-r--r--juniper/src/vespa/juniper/appender.cpp129
1 files changed, 129 insertions, 0 deletions
diff --git a/juniper/src/vespa/juniper/appender.cpp b/juniper/src/vespa/juniper/appender.cpp
new file mode 100644
index 00000000000..4d55f62a27a
--- /dev/null
+++ b/juniper/src/vespa/juniper/appender.cpp
@@ -0,0 +1,129 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "appender.h"
+#include "juniperdebug.h"
+#define _NEED_SUMMARY_CONFIG_IMPL
+#include "SummaryConfig.h"
+
+namespace juniper {
+
+void
+Appender::append(std::vector<char> & s, char c)
+{
+ JD_INVAR(JD_INPUT, c != 0, return,\
+ LOG(warning, "Document source contained 0-bytes"));
+ // eliminate separators:
+ if (_sumconf->separator(c)) {
+ return;
+ }
+
+ // eliminate multiple space characters
+ if (!_preserve_white_space) {
+ if (c > 0 && isspace(c)) {
+ if (_last_was_space) {
+ return;
+ } else {
+ _last_was_space = true;
+ }
+ c = ' '; // Never output newline or tab
+ } else {
+ _last_was_space = false;
+ }
+ }
+
+ bool handled_as_markup;
+ if (_escape_markup) {
+ handled_as_markup = true;
+ switch (c) {
+ case '<':
+ s.push_back('&');
+ s.push_back('l');
+ s.push_back('t');
+ s.push_back(';');
+ break;
+ case '>':
+ s.push_back('&');
+ s.push_back('g');
+ s.push_back('t');
+ s.push_back(';');
+ break;
+ case '"':
+ s.push_back('&');
+ s.push_back('q');
+ s.push_back('u');
+ s.push_back('o');
+ s.push_back('t');
+ s.push_back(';');
+ break;
+ case '&':
+ s.push_back('&');
+ s.push_back('a');
+ s.push_back('m');
+ s.push_back('p');
+ s.push_back(';');
+ break;
+ case '\'':
+ s.push_back('&');
+ s.push_back('#');
+ s.push_back('3');
+ s.push_back('9');
+ s.push_back(';');
+ break;
+ default:
+ handled_as_markup = false;
+ break;
+ }
+ if (handled_as_markup) {
+ _char_len++;
+ }
+ } else {
+ handled_as_markup = false;
+ }
+
+ if (!handled_as_markup) {
+ s.push_back(c);
+ /** If at start of an UTF8 character (both highest bits or none of them set)
+ * another char is accumulated..
+ */
+ if (!(c & 0x80) || (c & 0x40) ) {
+ _char_len++;
+ }
+ }
+}
+
+Appender::Appender(const SummaryConfig *sumconf)
+ : _sumconf(sumconf),
+ _escape_markup(false),
+ _preserve_white_space(false),
+ _last_was_space(false),
+ _char_len(0)
+{
+ ConfigFlag esc_conf = _sumconf->escape_markup();
+
+ switch (esc_conf) {
+ case CF_OFF:
+ _escape_markup = false;
+ break;
+ case CF_ON:
+ _escape_markup = true;
+ break;
+ case CF_AUTO:
+ _escape_markup = (_sumconf->highlight_on()[0] == '<' ||
+ _sumconf->highlight_off()[0] == '<' ||
+ _sumconf->dots()[0] == '<');
+ break;
+ }
+
+ if (_sumconf->preserve_white_space() == CF_ON) {
+ _preserve_white_space = true;
+ }
+}
+
+void
+Appender::append(std::vector<char>& s, const char* ds, int length) {
+ for (int i = 0; i < length; i++) {
+ append(s, ds[i]);
+ }
+}
+
+}