summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchsummary
Publish
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/.gitignore4
-rw-r--r--searchsummary/CMakeLists.txt29
-rw-r--r--searchsummary/OWNERS2
-rw-r--r--searchsummary/pom.xml44
-rw-r--r--searchsummary/src/.gitignore4
-rw-r--r--searchsummary/src/testlist.txt4
-rw-r--r--searchsummary/src/tests/docsumformat/.gitignore21
-rw-r--r--searchsummary/src/tests/docsumformat/CMakeLists.txt8
-rwxr-xr-xsearchsummary/src/tests/docsumformat/docsum-index.sh16
-rw-r--r--searchsummary/src/tests/docsumformat/docsum-pack.cpp631
-rw-r--r--searchsummary/src/tests/docsumformat/docsum-parse.cpp201
-rwxr-xr-xsearchsummary/src/tests/docsumformat/dotest.sh13
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/.gitignore2
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.117
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.214
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.35
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.417
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.53
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.65
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.711
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.87
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/OK.correct.913
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/README24
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.114
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.214
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.33
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.417
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.54
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.62
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.711
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.87
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/correct.913
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.10
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.214
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.314
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.414
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.514
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.613
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.714
-rw-r--r--searchsummary/src/tests/docsumformat/parsetest/incorrect.82
-rw-r--r--searchsummary/src/tests/docsummary/.gitignore4
-rw-r--r--searchsummary/src/tests/docsummary/CMakeLists.txt8
-rw-r--r--searchsummary/src/tests/docsummary/positionsdfw_test.cpp142
-rw-r--r--searchsummary/src/tests/docsummary/slime_summary/.gitignore1
-rw-r--r--searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt8
-rw-r--r--searchsummary/src/tests/docsummary/slime_summary/FILES1
-rw-r--r--searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp125
-rw-r--r--searchsummary/src/tests/extractkeywords/.gitignore7
-rw-r--r--searchsummary/src/tests/extractkeywords/CMakeLists.txt8
-rw-r--r--searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp295
-rw-r--r--searchsummary/src/tests/extractkeywords/extractkeywordstest.h34
-rwxr-xr-xsearchsummary/src/tests/extractkeywords/runtests.sh29
-rw-r--r--searchsummary/src/tests/extractkeywords/testowner.ATS1
-rw-r--r--searchsummary/src/vespa/searchsummary/.gitignore3
-rw-r--r--searchsummary/src/vespa/searchsummary/CMakeLists.txt8
-rw-r--r--searchsummary/src/vespa/searchsummary/config/.gitignore5
-rw-r--r--searchsummary/src/vespa/searchsummary/config/CMakeLists.txt7
-rw-r--r--searchsummary/src/vespa/searchsummary/config/juniperrc.def78
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/.gitignore6
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt26
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp435
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/attributedfw.h27
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp124
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.h41
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp286
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.h116
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp108
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumformat.h67
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp50
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h86
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h57
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumstorevalue.h63
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp517
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h123
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp494
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp195
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h36
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp96
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h91
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/idocsumenvironment.h24
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/itokenizer.h69
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h74
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.cpp112
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.h70
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp233
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h164
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp337
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h63
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp113
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h39
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp114
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultclass.h291
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp246
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h301
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp266
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h271
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp160
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h48
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.cpp94
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.h42
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/tokenizer.cpp112
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/tokenizer.h47
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp819
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/urlresult.h90
-rw-r--r--searchsummary/testrun/.gitignore9
105 files changed, 9281 insertions, 0 deletions
diff --git a/searchsummary/.gitignore b/searchsummary/.gitignore
new file mode 100644
index 00000000000..be0452bed21
--- /dev/null
+++ b/searchsummary/.gitignore
@@ -0,0 +1,4 @@
+/target
+/pom.xml.build
+Makefile
+Testing
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
new file mode 100644
index 00000000000..f3b57ec54f5
--- /dev/null
+++ b/searchsummary/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_define_module(
+ DEPENDS
+ fastos
+ vespalog
+ vespalib
+ staging_vespalib
+ fnet
+ configdefinitions
+ fastlib_fast
+ document
+ config_cloudconfig
+ searchcommon
+ persistencetypes
+ metrics
+ searchlib
+ juniper
+
+ LIBS
+ src/vespa/searchsummary
+ src/vespa/searchsummary/config
+ src/vespa/searchsummary/docsummary
+
+ TESTS
+ src/tests/docsumformat
+ src/tests/docsummary
+ src/tests/docsummary/slime_summary
+ src/tests/extractkeywords
+)
diff --git a/searchsummary/OWNERS b/searchsummary/OWNERS
new file mode 100644
index 00000000000..9673ef97e16
--- /dev/null
+++ b/searchsummary/OWNERS
@@ -0,0 +1,2 @@
+geirst
+balder
diff --git a/searchsummary/pom.xml b/searchsummary/pom.xml
new file mode 100644
index 00000000000..0676ac78527
--- /dev/null
+++ b/searchsummary/pom.xml
@@ -0,0 +1,44 @@
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>parent</artifactId>
+ <version>6-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+ <artifactId>searchsummary</artifactId>
+ <version>6-SNAPSHOT</version>
+ <packaging>jar</packaging>
+ <name>${project.artifactId}</name>
+ <dependencies>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-lib</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-class-plugin</artifactId>
+ <version>${project.version}</version>
+ <configuration>
+ <defFilesDirectories>src/vespa/searchsummary/config/</defFilesDirectories>
+ </configuration>
+ <executions>
+ <execution>
+ <id>config-gen</id>
+ <goals>
+ <goal>config-gen</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/searchsummary/src/.gitignore b/searchsummary/src/.gitignore
new file mode 100644
index 00000000000..47011ff3508
--- /dev/null
+++ b/searchsummary/src/.gitignore
@@ -0,0 +1,4 @@
+/Makefile.ini
+/config_command.sh
+/project.dsw
+/searchsummary.mak
diff --git a/searchsummary/src/testlist.txt b/searchsummary/src/testlist.txt
new file mode 100644
index 00000000000..62ea27ae736
--- /dev/null
+++ b/searchsummary/src/testlist.txt
@@ -0,0 +1,4 @@
+tests/docsumformat
+tests/docsummary
+tests/docsummary/slime_summary
+tests/extractkeywords
diff --git a/searchsummary/src/tests/docsumformat/.gitignore b/searchsummary/src/tests/docsumformat/.gitignore
new file mode 100644
index 00000000000..2c841cbd43d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/.gitignore
@@ -0,0 +1,21 @@
+*.cfg
+*.core
+*.ilk
+*.out
+*.pdb
+*.pid
+.depend
+Makefile
+core
+core.*
+datapart.*
+docsum-index
+docsum-pack
+docsum-parse
+index.cf
+merged
+meta-info.txt
+schema.txt
+summary.cf
+version.txt
+searchsummary_docsum-pack_app
diff --git a/searchsummary/src/tests/docsumformat/CMakeLists.txt b/searchsummary/src/tests/docsumformat/CMakeLists.txt
new file mode 100644
index 00000000000..ac8d2151792
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_docsum-pack_app
+ SOURCES
+ docsum-pack.cpp
+ DEPENDS
+ searchsummary
+)
+vespa_add_test(NAME searchsummary_docsum-pack_app COMMAND searchsummary_docsum-pack_app)
diff --git a/searchsummary/src/tests/docsumformat/docsum-index.sh b/searchsummary/src/tests/docsumformat/docsum-index.sh
new file mode 100755
index 00000000000..0d313191685
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/docsum-index.sh
@@ -0,0 +1,16 @@
+#!/bin/sh -e
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+findex=../../../bin/findex
+
+echo "CLEAN"
+rm -f index.cf
+rm -f summary.cf
+rm -rf merged
+rm -rf datapart.*
+
+echo "DOCSUM-INDEX"
+./docsum-index
+
+echo "AUTOINDEX"
+$findex autoindex
diff --git a/searchsummary/src/tests/docsumformat/docsum-pack.cpp b/searchsummary/src/tests/docsumformat/docsum-pack.cpp
new file mode 100644
index 00000000000..3f1b088bd12
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/docsum-pack.cpp
@@ -0,0 +1,631 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("docsum-pack");
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <vespa/searchsummary/docsummary/resultpacker.h>
+
+using namespace search::docsummary;
+
+
+// needed to resolve external symbol from httpd.h on AIX
+void FastS_block_usr2() {}
+
+
+class MyApp : public FastOS_Application
+{
+private:
+ bool _rc;
+ uint32_t _cnt;
+ search::docsummary::ResultConfig _config;
+ search::docsummary::ResultPacker _packer;
+
+public:
+ MyApp()
+ : _rc(false),
+ _cnt(0u),
+ _config(),
+ _packer(&_config)
+ {
+ }
+
+ // log test results
+ void ReportTestResult(uint32_t line, bool rc);
+ bool RTR(uint32_t line, bool rc)
+ { ReportTestResult(line, rc); return rc; }
+
+ // compare runtime info (,but ignore result class)
+ bool Equal(search::docsummary::ResEntry *a, search::docsummary::ResEntry *b);
+ bool Equal(search::docsummary::GeneralResult *a, search::docsummary::GeneralResult *b);
+
+ void TestFieldIndex(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, int idx);
+
+ void TestIntValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, uint32_t value);
+
+ void TestDoubleValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, double value);
+
+ void TestInt64Value(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, uint64_t value);
+
+ void TestStringValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, const char *value);
+
+ void TestDataValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, const char *value);
+
+ void TestBasic();
+ void TestFailLong();
+ void TestFailShort();
+ void TestFailOrder();
+ void TestCompress();
+ void TestCompat();
+ void TestBasicInplace();
+ void TestCompressInplace();
+
+ int Main();
+};
+
+
+void
+MyApp::ReportTestResult(uint32_t line, bool rc)
+{
+ _cnt++;
+
+ if (rc) {
+ LOG(info, "Test case %d: SUCCESS", _cnt);
+ } else {
+ LOG(error, "Test case %d: FAIL (see %s:%d)", _cnt, __FILE__, line);
+ _rc = false;
+ }
+}
+
+
+bool
+MyApp::Equal(search::docsummary::ResEntry *a, search::docsummary::ResEntry *b)
+{
+ if (a->_type != b->_type)
+ return false;
+
+ if (a->_intval != b->_intval)
+ return false;
+
+ if (a->_type != RES_INT &&
+ memcmp(a->_pt, b->_pt, a->_intval) != 0)
+ return false;
+
+ return true;
+}
+
+
+bool
+MyApp::Equal(search::docsummary::GeneralResult *a, search::docsummary::GeneralResult *b)
+{
+ uint32_t numEntries = a->GetClass()->GetNumEntries();
+
+ if (b->GetClass()->GetNumEntries() != numEntries)
+ return false;
+
+ for (uint32_t i = 0; i < numEntries; i++) {
+
+ if (!Equal(a->GetEntry(i), b->GetEntry(i)))
+ return false;
+
+ if (a->GetClass()->GetEntry(i)->_bindname != b->GetClass()->GetEntry(i)->_bindname)
+ return false;
+ }
+
+ return true;
+}
+
+
+void
+MyApp::TestFieldIndex(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, int idx)
+{
+ bool rc = (gres != NULL &&
+ gres->GetClass()->GetIndexFromName(field) == idx);
+
+ RTR(line, rc);
+}
+
+
+void
+MyApp::TestIntValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, uint32_t value)
+{
+ search::docsummary::ResEntry *entry
+ = (gres != NULL) ? gres->GetEntry(field) : NULL;
+
+ bool rc = (entry != NULL &&
+ entry->_type == RES_INT &&
+ entry->_intval == value);
+
+ RTR(line, rc);
+}
+
+
+void
+MyApp::TestDoubleValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, double value)
+{
+ search::docsummary::ResEntry *entry
+ = (gres != NULL) ? gres->GetEntry(field) : NULL;
+
+ bool rc = (entry != NULL &&
+ entry->_type == RES_DOUBLE &&
+ entry->_doubleval == value);
+
+ RTR(line, rc);
+}
+
+
+void
+MyApp::TestInt64Value(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, uint64_t value)
+{
+ search::docsummary::ResEntry *entry
+ = (gres != NULL) ? gres->GetEntry(field) : NULL;
+
+ bool rc = (entry != NULL &&
+ entry->_type == RES_INT64 &&
+ entry->_int64val == value);
+
+ RTR(line, rc);
+}
+
+
+void
+MyApp::TestStringValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, const char *value)
+{
+ search::docsummary::ResEntry *entry
+ = (gres != NULL) ? gres->GetEntry(field) : NULL;
+
+ bool rc = (entry != NULL &&
+ entry->_type == RES_STRING &&
+ entry->_stringlen == strlen(value) &&
+ strncmp(entry->_stringval, value, entry->_stringlen) == 0);
+
+ if (!rc && entry != NULL) {
+ LOG(warning,
+ "string value '%.*s' != '%s'",
+ (int) entry->_stringlen,
+ entry->_stringval, value);
+ }
+
+ RTR(line, rc);
+}
+
+
+void
+MyApp::TestDataValue(uint32_t line, search::docsummary::GeneralResult *gres,
+ const char *field, const char *value)
+{
+ search::docsummary::ResEntry *entry
+ = (gres != NULL) ? gres->GetEntry(field) : NULL;
+
+ bool rc = (entry != NULL &&
+ entry->_type == RES_DATA &&
+ entry->_datalen == strlen(value) &&
+ strncmp(entry->_dataval, value, entry->_datalen) == 0);
+
+ RTR(line, rc);
+}
+
+
+void
+MyApp::TestBasic()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ search::docsummary::urlresult *res;
+ search::docsummary::GeneralResult *gres;
+
+ uint32_t intval = 4;
+ uint16_t shortval = 2;
+ uint8_t byteval = 1;
+ float floatval = 4.5;
+ double doubleval = 8.75;
+ uint64_t int64val = 8;
+ const char *strval = "This is a string";
+ const char *datval = "This is data";
+ const char *lstrval = "This is a long string";
+ const char *ldatval = "This is long data";
+
+ RTR(__LINE__, _packer.Init(0));
+ RTR(__LINE__, _packer.AddInteger(intval));
+ RTR(__LINE__, _packer.AddShort(shortval));
+ RTR(__LINE__, _packer.AddByte(byteval));
+ RTR(__LINE__, _packer.AddFloat(floatval));
+ RTR(__LINE__, _packer.AddDouble(doubleval));
+ RTR(__LINE__, _packer.AddInt64(int64val));
+ RTR(__LINE__, _packer.AddString(strval, strlen(strval)));
+ RTR(__LINE__, _packer.AddData(datval, strlen(datval)));
+ RTR(__LINE__, _packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, _packer.AddLongData(ldatval, strlen(ldatval)));
+ RTR(__LINE__, _packer.GetDocsumBlob(&buf, &buflen));
+
+ res = _config.Unpack(0, 0, 0, buf, buflen);
+ gres = res->IsGeneral() ? (search::docsummary::GeneralResult *) res : NULL;
+
+ RTR(__LINE__, gres != NULL);
+ TestIntValue (__LINE__, gres, "integer", 4);
+ TestIntValue (__LINE__, gres, "short", 2);
+ TestIntValue (__LINE__, gres, "byte", 1);
+ TestDoubleValue(__LINE__, gres, "float", floatval);
+ TestDoubleValue(__LINE__, gres, "double", doubleval);
+ TestInt64Value (__LINE__, gres, "int64", int64val);
+ TestStringValue(__LINE__, gres, "string", strval);
+ TestDataValue (__LINE__, gres, "data", datval);
+ TestStringValue(__LINE__, gres, "longstring", lstrval);
+ TestDataValue (__LINE__, gres, "longdata", ldatval);
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetNumEntries() == 10));
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetClassID() == 0));
+ delete res;
+}
+
+
+void
+MyApp::TestFailLong()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ uint32_t intval = 4;
+ uint16_t shortval = 2;
+ uint8_t byteval = 1;
+ float floatval = 4.5;
+ double doubleval = 8.75;
+ uint64_t int64val = 8;
+ const char *strval = "This is a string";
+ const char *datval = "This is data";
+ const char *lstrval = "This is a long string";
+ const char *ldatval = "This is long data";
+
+ RTR(__LINE__, _packer.Init(0));
+ RTR(__LINE__, _packer.AddInteger(intval));
+ RTR(__LINE__, _packer.AddShort(shortval));
+ RTR(__LINE__, _packer.AddByte(byteval));
+ RTR(__LINE__, _packer.AddFloat(floatval));
+ RTR(__LINE__, _packer.AddDouble(doubleval));
+ RTR(__LINE__, _packer.AddInt64(int64val));
+ RTR(__LINE__, _packer.AddString(strval, strlen(strval)));
+ RTR(__LINE__, _packer.AddData(datval, strlen(datval)));
+ RTR(__LINE__, _packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, _packer.AddLongData(ldatval, strlen(ldatval)));
+ RTR(__LINE__, !_packer.AddByte(byteval));
+ RTR(__LINE__, !_packer.GetDocsumBlob(&buf, &buflen));
+}
+
+
+void
+MyApp::TestFailShort()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ uint32_t intval = 4;
+ uint16_t shortval = 2;
+ uint8_t byteval = 1;
+ float floatval = 4.5;
+ double doubleval = 8.75;
+ uint64_t int64val = 8;
+ const char *strval = "This is a string";
+ const char *datval = "This is data";
+ const char *lstrval = "This is a long string";
+
+ RTR(__LINE__, _packer.Init(0));
+ RTR(__LINE__, _packer.AddInteger(intval));
+ RTR(__LINE__, _packer.AddShort(shortval));
+ RTR(__LINE__, _packer.AddByte(byteval));
+ RTR(__LINE__, _packer.AddFloat(floatval));
+ RTR(__LINE__, _packer.AddDouble(doubleval));
+ RTR(__LINE__, _packer.AddInt64(int64val));
+ RTR(__LINE__, _packer.AddString(strval, strlen(strval)));
+ RTR(__LINE__, _packer.AddData(datval, strlen(datval)));
+ RTR(__LINE__, _packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, !_packer.GetDocsumBlob(&buf, &buflen));
+}
+
+
+void
+MyApp::TestFailOrder()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ uint32_t intval = 4;
+ uint16_t shortval = 2;
+ uint8_t byteval = 1;
+ float floatval = 4.5;
+ double doubleval = 8.75;
+ uint64_t int64val = 8;
+ const char *strval = "This is a string";
+ const char *datval = "This is data";
+ const char *lstrval = "This is a long string";
+ const char *ldatval = "This is long data";
+
+ RTR(__LINE__, _packer.Init(0));
+ RTR(__LINE__, _packer.AddInteger(intval));
+ RTR(__LINE__, _packer.AddShort(shortval));
+ RTR(__LINE__, !_packer.AddString(strval, strlen(strval)));
+ RTR(__LINE__, !_packer.AddByte(byteval));
+ RTR(__LINE__, !_packer.AddFloat(floatval));
+ RTR(__LINE__, !_packer.AddDouble(doubleval));
+ RTR(__LINE__, !_packer.AddInt64(int64val));
+ RTR(__LINE__, !_packer.AddData(datval, strlen(datval)));
+ RTR(__LINE__, !_packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, !_packer.AddLongData(ldatval, strlen(ldatval)));
+ RTR(__LINE__, !_packer.GetDocsumBlob(&buf, &buflen));
+}
+
+
+void
+MyApp::TestCompress()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ search::docsummary::urlresult *res;
+ search::docsummary::GeneralResult *gres;
+
+ const char *lstrval = "string string string";
+ const char *ldatval = "data data data";
+
+ RTR(__LINE__, _packer.Init(2));
+ RTR(__LINE__, _packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, _packer.AddLongData(ldatval, strlen(ldatval)));
+ RTR(__LINE__, _packer.GetDocsumBlob(&buf, &buflen));
+
+ res = _config.Unpack(0, 0, 0, buf, buflen);
+ gres = res->IsGeneral() ? (search::docsummary::GeneralResult *) res : NULL;
+
+ RTR(__LINE__, gres != NULL);
+ TestStringValue(__LINE__, gres, "text", lstrval);
+ TestDataValue (__LINE__, gres, "data", ldatval);
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetNumEntries() == 2));
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetClassID() == 2));
+ delete res;
+}
+
+
+void
+MyApp::TestCompat()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ search::docsummary::urlresult *res1;
+ search::docsummary::GeneralResult *gres1;
+
+ search::docsummary::urlresult *res2;
+ search::docsummary::GeneralResult *gres2;
+
+ const char *strval = "string string string string";
+ const char *datval = "data data data data";
+
+ RTR(__LINE__, _packer.Init(1));
+ RTR(__LINE__, _packer.AddData(strval, strlen(strval)));
+ RTR(__LINE__, _packer.AddString(datval, strlen(datval)));
+ RTR(__LINE__, _packer.GetDocsumBlob(&buf, &buflen));
+ res1 = _config.Unpack(0, 0, 0, buf, buflen);
+ gres1 = res1->IsGeneral() ? (search::docsummary::GeneralResult *) res1 : NULL;
+
+ RTR(__LINE__, _packer.Init(2));
+ RTR(__LINE__, _packer.AddLongData(strval, strlen(strval)));
+ RTR(__LINE__, _packer.AddLongString(datval, strlen(datval)));
+ RTR(__LINE__, _packer.GetDocsumBlob(&buf, &buflen));
+ res2 = _config.Unpack(0, 0, 0, buf, buflen);
+ gres2 = res2->IsGeneral() ? (search::docsummary::GeneralResult *) res2 : NULL;
+
+ RTR(__LINE__, gres1 != NULL);
+ RTR(__LINE__, gres2 != NULL);
+
+ TestStringValue(__LINE__, gres1, "text", strval);
+ TestDataValue (__LINE__, gres1, "data", datval);
+ TestFieldIndex (__LINE__, gres1, "text", 0);
+ TestFieldIndex (__LINE__, gres1, "data", 1);
+ RTR(__LINE__, (gres1 != NULL &&
+ gres1->GetClass()->GetNumEntries() == 2));
+
+ TestStringValue(__LINE__, gres2, "text", strval);
+ TestDataValue (__LINE__, gres2, "data", datval);
+ TestFieldIndex (__LINE__, gres2, "text", 0);
+ TestFieldIndex (__LINE__, gres2, "data", 1);
+ RTR(__LINE__, (gres2 != NULL &&
+ gres2->GetClass()->GetNumEntries() == 2));
+
+ RTR(__LINE__, (gres1 != NULL &&
+ gres1->GetClass()->GetClassID() == 1));
+ RTR(__LINE__, (gres2 != NULL &&
+ gres2->GetClass()->GetClassID() == 2));
+
+ RTR(__LINE__, (gres1 != NULL && gres2 != NULL &&
+ Equal(gres1, gres2)));
+
+ delete res1;
+ delete res2;
+}
+
+
+void
+MyApp::TestBasicInplace()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ const search::docsummary::ResultClass *resClass;
+ search::docsummary::GeneralResult *gres;
+
+ uint32_t intval = 4;
+ uint16_t shortval = 2;
+ uint8_t byteval = 1;
+ float floatval = 4.5;
+ double doubleval = 8.75;
+ uint64_t int64val = 8;
+ const char *strval = "This is a string";
+ const char *datval = "This is data";
+ const char *lstrval = "This is a long string";
+ const char *ldatval = "This is long data";
+
+ RTR(__LINE__, _packer.Init(0));
+ RTR(__LINE__, _packer.AddInteger(intval));
+ RTR(__LINE__, _packer.AddShort(shortval));
+ RTR(__LINE__, _packer.AddByte(byteval));
+ RTR(__LINE__, _packer.AddFloat(floatval));
+ RTR(__LINE__, _packer.AddDouble(doubleval));
+ RTR(__LINE__, _packer.AddInt64(int64val));
+ RTR(__LINE__, _packer.AddString(strval, strlen(strval)));
+ RTR(__LINE__, _packer.AddData(datval, strlen(datval)));
+ RTR(__LINE__, _packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, _packer.AddLongData(ldatval, strlen(ldatval)));
+ RTR(__LINE__, _packer.GetDocsumBlob(&buf, &buflen));
+
+ resClass = _config.LookupResultClass(_config.GetClassID(buf, buflen));
+ if (resClass == NULL) {
+ gres = NULL;
+ } else {
+ DocsumStoreValue value(buf, buflen);
+ gres = new search::docsummary::GeneralResult(resClass, 0, 0, 0);
+ if (!gres->inplaceUnpack(value)) {
+ delete gres;
+ gres = NULL;
+ }
+ }
+
+ RTR(__LINE__, gres != NULL);
+ TestIntValue (__LINE__, gres, "integer", 4);
+ TestIntValue (__LINE__, gres, "short", 2);
+ TestIntValue (__LINE__, gres, "byte", 1);
+ TestDoubleValue(__LINE__, gres, "float", floatval);
+ TestDoubleValue(__LINE__, gres, "double", doubleval);
+ TestInt64Value (__LINE__, gres, "int64", int64val);
+ TestStringValue(__LINE__, gres, "string", strval);
+ TestDataValue (__LINE__, gres, "data", datval);
+ TestStringValue(__LINE__, gres, "longstring", lstrval);
+ TestDataValue (__LINE__, gres, "longdata", ldatval);
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetNumEntries() == 10));
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetClassID() == 0));
+ delete gres;
+}
+
+
+void
+MyApp::TestCompressInplace()
+{
+ const char *buf;
+ uint32_t buflen;
+
+ search::RawBuf field1(32768);
+ search::RawBuf field2(32768);
+ const search::docsummary::ResultClass *resClass;
+ search::docsummary::GeneralResult *gres;
+
+ const char *lstrval = "string string string";
+ const char *ldatval = "data data data";
+
+ RTR(__LINE__, _packer.Init(2));
+ RTR(__LINE__, _packer.AddLongString(lstrval, strlen(lstrval)));
+ RTR(__LINE__, _packer.AddLongData(ldatval, strlen(ldatval)));
+ RTR(__LINE__, _packer.GetDocsumBlob(&buf, &buflen));
+
+ resClass = _config.LookupResultClass(_config.GetClassID(buf, buflen));
+ if (resClass == NULL) {
+ gres = NULL;
+ } else {
+ DocsumStoreValue value(buf, buflen);
+ gres = new search::docsummary::GeneralResult(resClass, 0, 0, 0);
+ if (!gres->inplaceUnpack(value)) {
+ delete gres;
+ gres = NULL;
+ }
+ }
+
+ search::docsummary::ResEntry *e1 = (gres == NULL) ? NULL : gres->GetEntry("text");
+ search::docsummary::ResEntry *e2 = (gres == NULL) ? NULL : gres->GetEntry("data");
+
+ if (e1 != NULL)
+ e1->_extract_field(&field1);
+ if (e2 != NULL)
+ e2->_extract_field(&field2);
+
+ RTR(__LINE__, gres != NULL);
+ RTR(__LINE__, e1 != NULL);
+ RTR(__LINE__, e2 != NULL);
+ RTR(__LINE__, strcmp(field1.GetDrainPos(), lstrval) == 0);
+ RTR(__LINE__, strcmp(field2.GetDrainPos(), ldatval) == 0);
+ RTR(__LINE__, strlen(lstrval) == field1.GetUsedLen());
+ RTR(__LINE__, strlen(ldatval) == field2.GetUsedLen());
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetNumEntries() == 2));
+ RTR(__LINE__, (gres != NULL &&
+ gres->GetClass()->GetClassID() == 2));
+ delete gres;
+}
+
+
+
+int
+MyApp::Main()
+{
+ _rc = true;
+ _cnt = 0;
+
+ search::docsummary::ResultClass *resClass;
+
+ resClass = _config.AddResultClass("c0", 0);
+ resClass->AddConfigEntry("integer", RES_INT);
+ resClass->AddConfigEntry("short", RES_SHORT);
+ resClass->AddConfigEntry("byte", RES_BYTE);
+ resClass->AddConfigEntry("float", RES_FLOAT);
+ resClass->AddConfigEntry("double", RES_DOUBLE);
+ resClass->AddConfigEntry("int64", RES_INT64);
+ resClass->AddConfigEntry("string", RES_STRING);
+ resClass->AddConfigEntry("data", RES_DATA);
+ resClass->AddConfigEntry("longstring", RES_LONG_STRING);
+ resClass->AddConfigEntry("longdata", RES_LONG_DATA);
+
+ resClass = _config.AddResultClass("c1", 1);
+ resClass->AddConfigEntry("text", RES_STRING);
+ resClass->AddConfigEntry("data", RES_DATA);
+
+ resClass = _config.AddResultClass("c2", 2);
+ resClass->AddConfigEntry("text", RES_LONG_STRING);
+ resClass->AddConfigEntry("data", RES_LONG_DATA);
+
+ TestBasic();
+ TestFailLong();
+ TestFailShort();
+ TestFailOrder();
+ TestCompress();
+ TestCompat();
+ TestBasicInplace();
+ TestCompressInplace();
+
+ LOG(info, "CONCLUSION: %s", (_rc) ? "SUCCESS" : "FAIL");
+ return (_rc ? 0 : 1);
+}
+
+
+int
+main(int argc, char **argv)
+{
+ MyApp myapp;
+ return myapp.Entry(argc, argv);
+}
diff --git a/searchsummary/src/tests/docsumformat/docsum-parse.cpp b/searchsummary/src/tests/docsumformat/docsum-parse.cpp
new file mode 100644
index 00000000000..5fa7009464c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/docsum-parse.cpp
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("docsum-parse");
+#include <vespa/fnet/frt/frt.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+
+
+// needed to resolve external symbol from httpd.h on AIX
+void FastS_block_usr2() {}
+
+
+class MyApp : public FastOS_Application
+{
+public:
+ bool Equal(search::docsummary::ResConfigEntry *a, search::docsummary::ResConfigEntry *b);
+ bool Equal(search::docsummary::ResultClass *a, search::docsummary::ResultClass *b);
+ bool Equal(search::docsummary::ResultConfig *a, search::docsummary::ResultConfig *b);
+ bool TestCorrect(const char *dirname, const char *filename);
+ bool TestIncorrect(const char *dirname, const char *filename);
+ int Main();
+};
+
+
+bool
+MyApp::Equal(search::docsummary::ResConfigEntry *a, search::docsummary::ResConfigEntry *b)
+{
+ return ((a->_type == b->_type)
+ && (strcmp(a->_bindname, b->_bindname) == 0));
+}
+
+
+bool
+MyApp::Equal(search::docsummary::ResultClass *a, search::docsummary::ResultClass *b)
+{
+ bool rc = true;
+
+ rc = rc && (a->GetNumEntries() == b->GetNumEntries());
+ rc = rc && (a->GetClassID() == b->GetClassID());
+ rc = rc && (strcmp(a->GetClassName(), b->GetClassName()) == 0);
+
+ for (uint32_t i = 0; rc && i < a->GetNumEntries(); i++) {
+ rc = rc && Equal(a->GetEntry(i), b->GetEntry(i));
+ }
+
+ return rc;
+}
+
+
+bool
+MyApp::Equal(search::docsummary::ResultConfig *a, search::docsummary::ResultConfig *b)
+{
+ bool rc = true;
+
+ search::docsummary::ResultClass *resClassA;
+ search::docsummary::ResultClass *resClassB;
+
+ rc = rc && (a->GetNumResultClasses() == b->GetNumResultClasses());
+
+ resClassA = a->GetResultClasses();
+ resClassB = b->GetResultClasses();
+
+ while(rc && resClassA != NULL && resClassB != NULL) {
+ rc = rc && Equal(resClassA, resClassB);
+ resClassA = resClassA->GetNextClass();
+ resClassB = resClassB->GetNextClass();
+ }
+ rc = rc && (resClassA == NULL);
+ rc = rc && (resClassB == NULL);
+
+ return rc;
+}
+
+
+bool
+MyApp::TestCorrect(const char *dirname, const char *filename)
+{
+ char str1[512]; // test input file
+ char str2[512]; // test output file
+ char str3[512]; // summary.cf verification file
+
+ search::docsummary::ResultConfig a;
+ search::docsummary::ResultConfig b;
+ search::docsummary::ResultConfig c;
+ search::docsummary::ResultConfig d;
+
+ sprintf(str1, "%s%s%s", dirname,
+ FastOS_FileInterface::GetPathSeparator(), filename);
+ sprintf(str2, "%s%sout.%s", dirname,
+ FastOS_FileInterface::GetPathSeparator(), filename);
+ sprintf(str3, "%s%sOK.%s", dirname,
+ FastOS_FileInterface::GetPathSeparator(), filename);
+
+ if (!a.ReadConfig(str1)) {
+ LOG(error, "could not read config from : %s", str1);
+ return false;
+ }
+
+ if (!a.WriteConfig(str2)) {
+ LOG(error, "could not write config to : %s", str2);
+ return false;
+ }
+
+ if (!b.ReadConfig(str2)) {
+ LOG(error, "could not read config from : %s", str2);
+ return false;
+ }
+
+ if (!c.ReadConfig(str3)) {
+ LOG(error, "could not read config from : %s", str3);
+ return false;
+ }
+
+ if (!Equal(&a, &b)) {
+ LOG(error, "%s and %s does not contain the same config", str1, str2);
+ return false;
+ }
+
+ if (!Equal(&a, &c)) {
+ LOG(error, "%s and %s does not contain the same config", str1, str3);
+ return false;
+ }
+
+ if (!Equal(&b, &c)) {
+ LOG(error, "%s and %s does not contain the same config", str2, str3);
+ return false;
+ }
+
+ FRT_RPCRequest *req = new FRT_RPCRequest();
+ assert(req != NULL);
+ c.GetConfig(req);
+ d.SetConfig(req);
+ if (!Equal(&c, &d)) {
+ LOG(error, "RPC get/set failed (%s)", str3);
+ req->SubRef();
+ return false;
+ }
+ req->SubRef();
+
+ return true;
+}
+
+
+bool
+MyApp::TestIncorrect(const char *dirname, const char *filename)
+{
+ char str[512];
+
+ sprintf(str, "%s%s%s", dirname,
+ FastOS_FileInterface::GetPathSeparator(), filename);
+
+ search::docsummary::ResultConfig resConfig;
+
+ if (resConfig.ReadConfig(str)) {
+ LOG(error, "'%s' did not give parse error", str);
+ return false;
+ }
+ return true;
+}
+
+
+int
+MyApp::Main()
+{
+ bool rc = true;
+
+ FastOS_DirectoryScan dirScan("parsetest");
+ LOG(info, "looking for input files in 'parsetest'...");
+ while (dirScan.ReadNext()) {
+ if (strncmp(dirScan.GetName(), "correct.", 8) == 0) {
+ if (TestCorrect("parsetest", dirScan.GetName())) {
+ LOG(info, "'%s' : positive test PASSED", dirScan.GetName());
+ } else {
+ LOG(error, "'%s' : positive test FAILED", dirScan.GetName());
+ rc = false;
+ }
+ } else if (strncmp(dirScan.GetName(), "incorrect.", 10) == 0) {
+ if (TestIncorrect("parsetest", dirScan.GetName())) {
+ LOG(info, "'%s' : negative test PASSED", dirScan.GetName());
+ } else {
+ LOG(error, "'%s' : negative test FAILED", dirScan.GetName());
+ rc = false;
+ }
+ }
+ }
+ return (rc ? 0 : 1);
+}
+
+
+int
+main(int argc, char **argv)
+{
+ MyApp myapp;
+ return myapp.Entry(argc, argv);
+}
diff --git a/searchsummary/src/tests/docsumformat/dotest.sh b/searchsummary/src/tests/docsumformat/dotest.sh
new file mode 100755
index 00000000000..64097b0061d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/dotest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+echo "running pack test..."
+./docsum-pack > packtest.out 2>&1
+res=$?
+if [ $res -eq 0 ]; then
+ echo "pack test PASSED"
+else
+ echo "pack test FAILED!"
+ echo "please check packtest.out"
+ exit 1
+fi
diff --git a/searchsummary/src/tests/docsumformat/parsetest/.gitignore b/searchsummary/src/tests/docsumformat/parsetest/.gitignore
new file mode 100644
index 00000000000..19815d313ff
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/.gitignore
@@ -0,0 +1,2 @@
+*.out
+out.*
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.1 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.1
new file mode 100644
index 00000000000..8238b53f81c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.1
@@ -0,0 +1,17 @@
+idtype none
+
+class default id 0
+field URL type string
+field TITLE type string
+field TEASER type string
+field DSHOST type integer
+field DSKEY type integer
+field BYTES type integer
+field WORDS type integer
+field MODDATE type integer
+field CRAWLDATE type integer
+field LANG1 type byte
+field LANG2 type byte
+field LANG3 type byte
+field LANG4 type byte
+field CHARSET type integer
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.2 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.2
new file mode 100644
index 00000000000..8996c2dac4c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.2
@@ -0,0 +1,14 @@
+idtype byte
+
+class document id 1
+field title type string
+field teaser type string
+field url type string
+field date type integer
+
+class image id 2
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.3 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.3
new file mode 100644
index 00000000000..ae29fa40335
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.3
@@ -0,0 +1,5 @@
+idtype none
+
+class default id 0
+field TITLE type string
+field DATE type integer
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.4 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.4
new file mode 100644
index 00000000000..8238b53f81c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.4
@@ -0,0 +1,17 @@
+idtype none
+
+class default id 0
+field URL type string
+field TITLE type string
+field TEASER type string
+field DSHOST type integer
+field DSKEY type integer
+field BYTES type integer
+field WORDS type integer
+field MODDATE type integer
+field CRAWLDATE type integer
+field LANG1 type byte
+field LANG2 type byte
+field LANG3 type byte
+field LANG4 type byte
+field CHARSET type integer
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.5 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.5
new file mode 100644
index 00000000000..6b6dc874a68
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.5
@@ -0,0 +1,3 @@
+idtype byte
+
+class myclass id 42
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.6 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.6
new file mode 100644
index 00000000000..38416fdf45e
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.6
@@ -0,0 +1,5 @@
+idtype none
+
+class default id 0
+field TEASER type longstring
+field DOCTEXT type longdata
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.7 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.7
new file mode 100644
index 00000000000..d1f17d25141
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.7
@@ -0,0 +1,11 @@
+idtype short
+
+class class_1 id 1
+field title type string
+field rawteaser type data
+field doctext type longdata
+field dynteaser type longstring
+
+class class_2 id 2
+field title type string
+field rawteaser type longdata
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.8 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.8
new file mode 100644
index 00000000000..e929b872a05
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.8
@@ -0,0 +1,7 @@
+idtype integer
+
+class class_50 id 50
+field title type data
+
+class class_100 id 100
+field title type string
diff --git a/searchsummary/src/tests/docsumformat/parsetest/OK.correct.9 b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.9
new file mode 100644
index 00000000000..668505be77d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/OK.correct.9
@@ -0,0 +1,13 @@
+idtype none
+
+class default id 0
+field f0 type integer
+field f1 type short
+field f2 type byte
+field f3 type float
+field f4 type double
+field f5 type int64
+field f6 type string
+field f7 type data
+field f8 type longstring
+field f9 type longdata
diff --git a/searchsummary/src/tests/docsumformat/parsetest/README b/searchsummary/src/tests/docsumformat/parsetest/README
new file mode 100644
index 00000000000..2de83e1b0cb
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/README
@@ -0,0 +1,24 @@
+The files in this directory are used to test the parsing of document
+summary format config files. The files are named by the following
+rules:
+
+incorrect.* : these files are incorrect; loading them should fail.
+
+correct.* : these files are correct; loading them should succeed.
+
+OK.correct.* : these files contain normalized config on 'summary.cf'
+ format that matches the config contained in
+ the corresponding 'correct.*' files.
+
+The 'docsum-parse' program loops through all files in this
+directory. For each file that has a name beginning with 'incorrect.',
+it checks that loading document summary format config from it
+fails. For each file that has a name beginning with 'correct.', it
+checks that document summary format config may be read from the
+file. It then writes the config back to a file named 'out.correct.<>',
+reads the newly generated file back in, reads the corresponding
+'OK.correct.<>' file and checks that all 3 configs are exactly the
+same.
+
+New tests may be added simply be adding files conforming to the above
+rules to this directory.
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.1 b/searchsummary/src/tests/docsumformat/parsetest/correct.1
new file mode 100644
index 00000000000..0b3d57b7f9c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.1
@@ -0,0 +1,14 @@
+STRING URL
+STRING TITLE
+STRING TEASER
+INT DSHOST
+INT DSKEY
+INT BYTES
+INT WORDS
+INT MODDATE
+INT CRAWLDATE
+BYTE LANG1
+BYTE LANG2
+BYTE LANG3
+BYTE LANG4
+INT CHARSET
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.2 b/searchsummary/src/tests/docsumformat/parsetest/correct.2
new file mode 100644
index 00000000000..8996c2dac4c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.2
@@ -0,0 +1,14 @@
+idtype byte
+
+class document id 1
+field title type string
+field teaser type string
+field url type string
+field date type integer
+
+class image id 2
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.3 b/searchsummary/src/tests/docsumformat/parsetest/correct.3
new file mode 100644
index 00000000000..8a16e3f3fd1
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.3
@@ -0,0 +1,3 @@
+idtype byte
+STRING TITLE
+INT DATE
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.4 b/searchsummary/src/tests/docsumformat/parsetest/correct.4
new file mode 100644
index 00000000000..8238b53f81c
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.4
@@ -0,0 +1,17 @@
+idtype none
+
+class default id 0
+field URL type string
+field TITLE type string
+field TEASER type string
+field DSHOST type integer
+field DSKEY type integer
+field BYTES type integer
+field WORDS type integer
+field MODDATE type integer
+field CRAWLDATE type integer
+field LANG1 type byte
+field LANG2 type byte
+field LANG3 type byte
+field LANG4 type byte
+field CHARSET type integer
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.5 b/searchsummary/src/tests/docsumformat/parsetest/correct.5
new file mode 100644
index 00000000000..d179537e208
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.5
@@ -0,0 +1,4 @@
+idtype byte
+class myclass id 42
+STRING TITLE
+INT DATE
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.6 b/searchsummary/src/tests/docsumformat/parsetest/correct.6
new file mode 100644
index 00000000000..a4e41ec72d8
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.6
@@ -0,0 +1,2 @@
+LONGSTRING TEASER
+LONGDATA DOCTEXT
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.7 b/searchsummary/src/tests/docsumformat/parsetest/correct.7
new file mode 100644
index 00000000000..d1f17d25141
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.7
@@ -0,0 +1,11 @@
+idtype short
+
+class class_1 id 1
+field title type string
+field rawteaser type data
+field doctext type longdata
+field dynteaser type longstring
+
+class class_2 id 2
+field title type string
+field rawteaser type longdata
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.8 b/searchsummary/src/tests/docsumformat/parsetest/correct.8
new file mode 100644
index 00000000000..e929b872a05
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.8
@@ -0,0 +1,7 @@
+idtype integer
+
+class class_50 id 50
+field title type data
+
+class class_100 id 100
+field title type string
diff --git a/searchsummary/src/tests/docsumformat/parsetest/correct.9 b/searchsummary/src/tests/docsumformat/parsetest/correct.9
new file mode 100644
index 00000000000..668505be77d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/correct.9
@@ -0,0 +1,13 @@
+idtype none
+
+class default id 0
+field f0 type integer
+field f1 type short
+field f2 type byte
+field f3 type float
+field f4 type double
+field f5 type int64
+field f6 type string
+field f7 type data
+field f8 type longstring
+field f9 type longdata
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.1 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.1
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.1
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.2 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.2
new file mode 100644
index 00000000000..600380f898d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.2
@@ -0,0 +1,14 @@
+idtype int
+
+class document id 1
+field title type string
+field teaser type string
+field url type string
+field date type integer
+
+class image id 2
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.3 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.3
new file mode 100644
index 00000000000..35d46b73f96
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.3
@@ -0,0 +1,14 @@
+idtype byte
+
+class document id 1
+field title type string
+field teaser type string
+field url type string
+field date type integer
+
+class image id 1
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.4 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.4
new file mode 100644
index 00000000000..f50c143b4be
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.4
@@ -0,0 +1,14 @@
+idtype byte
+
+class document id 1
+field title type string
+field teaser type string
+field url type string
+field date type int
+
+class image id 2
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.5 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.5
new file mode 100644
index 00000000000..6579c30a29d
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.5
@@ -0,0 +1,14 @@
+idtype byte
+
+class document id 1
+field title type string
+field teaser type string
+field url type string
+field url type integer
+
+class image id 2
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.6 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.6
new file mode 100644
index 00000000000..2ce1ab9507e
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.6
@@ -0,0 +1,13 @@
+idtype byte
+
+field title type string
+field teaser type string
+field url type string
+field date type integer
+
+class image id 2
+field title type string
+field date type integer
+field width type short
+field height type short
+field bitmaps type byte
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.7 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.7
new file mode 100644
index 00000000000..e51bb1d2d48
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.7
@@ -0,0 +1,14 @@
+STRING URL
+STRING TITLE
+STRING TITLE
+INT DSHOST
+INT DSKEY
+INT BYTES
+INT WORDS
+INT MODDATE
+INT CRAWLDATE
+BYTE LANG1
+BYTE LANG2
+BYTE LANG3
+BYTE LANG4
+INT CHARSET
diff --git a/searchsummary/src/tests/docsumformat/parsetest/incorrect.8 b/searchsummary/src/tests/docsumformat/parsetest/incorrect.8
new file mode 100644
index 00000000000..7639557b734
--- /dev/null
+++ b/searchsummary/src/tests/docsumformat/parsetest/incorrect.8
@@ -0,0 +1,2 @@
+idtype byte
+STRING TITLE
diff --git a/searchsummary/src/tests/docsummary/.gitignore b/searchsummary/src/tests/docsummary/.gitignore
new file mode 100644
index 00000000000..3f0be20ca74
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchsummary_positionsdfw_test_app
diff --git a/searchsummary/src/tests/docsummary/CMakeLists.txt b/searchsummary/src/tests/docsummary/CMakeLists.txt
new file mode 100644
index 00000000000..7eaa78d923e
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_positionsdfw_test_app
+ SOURCES
+ positionsdfw_test.cpp
+ DEPENDS
+ searchsummary
+)
+vespa_add_test(NAME searchsummary_positionsdfw_test_app COMMAND searchsummary_positionsdfw_test_app)
diff --git a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp
new file mode 100644
index 00000000000..59f91e12ef7
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp
@@ -0,0 +1,142 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for positionsdfw.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("positionsdfw_test");
+
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/searchsummary/docsummary/positionsdfw.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using search::RawBuf;
+using search::IAttributeManager;
+using search::SingleInt64ExtAttribute;
+using search::attribute::IAttributeContext;
+using search::attribute::IAttributeVector;
+using vespalib::string;
+using std::vector;
+
+namespace search {
+namespace docsummary {
+
+namespace {
+
+class Test : public vespalib::TestApp {
+ void requireThat2DPositionFieldIsWritten();
+
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("positionsdfw_test");
+
+ TEST_DO(requireThat2DPositionFieldIsWritten());
+
+ TEST_DONE();
+}
+
+struct MyEnvironment : IDocsumEnvironment {
+ IAttributeManager *attribute_man;
+
+ MyEnvironment() : attribute_man(0) {}
+
+ virtual IAttributeManager *getAttributeManager() { return attribute_man; }
+ virtual string lookupIndex(const string &s) const { return s; }
+ virtual juniper::Juniper *getJuniper() { return 0; }
+};
+
+class MyAttributeContext : public IAttributeContext {
+ const IAttributeVector &_attr;
+public:
+ MyAttributeContext(const IAttributeVector &attr) : _attr(attr) {}
+ virtual const IAttributeVector *getAttribute(const string &) const {
+ return &_attr;
+ }
+ virtual const IAttributeVector *getAttributeStableEnum(
+ const string &) const { abort(); }
+ virtual void getAttributeList(vector<const IAttributeVector *> &) const
+ { abort(); }
+};
+
+class MyAttributeManager : public IAttributeManager {
+ const IAttributeVector &_attr;
+public:
+
+ MyAttributeManager(const IAttributeVector &attr) : _attr(attr) {}
+ virtual AttributeGuard::UP getAttribute(const string &) const {
+ abort();
+ }
+ virtual AttributeGuard::UP getAttributeStableEnum(const string &) const {
+ abort();
+ }
+ virtual void getAttributeList(vector<AttributeGuard> &) const {
+ abort();
+ }
+ virtual IAttributeContext::UP createContext() const {
+ return IAttributeContext::UP(new MyAttributeContext(_attr));
+ }
+};
+
+struct MyGetDocsumsStateCallback : GetDocsumsStateCallback {
+ virtual void FillSummaryFeatures(GetDocsumsState *, IDocsumEnvironment *) {}
+ virtual void FillRankFeatures(GetDocsumsState *, IDocsumEnvironment *) {}
+ virtual void ParseLocation(GetDocsumsState *) {}
+};
+
+template <typename AttrType>
+void checkWritePositionField(Test &test, AttrType &attr,
+ uint32_t doc_id, const string &expected) {
+ for (AttributeVector::DocId i = 0; i < doc_id + 1; ) {
+ attr.addDoc(i);
+ if (i == 007) {
+ attr.add((int64_t) -1);
+ } else if (i == 0x42) {
+ attr.add(0xAAAAaaaaAAAAaaaa);
+ } else if (i == 0x17) {
+ attr.add(0x5555aaaa5555aaab);
+ } else if (i == 42) {
+ attr.add(0x8000000000000000);
+ } else {
+ attr.add(i); // value = docid
+ }
+ }
+
+ MyAttributeManager attribute_man(attr);
+ PositionsDFW::UP writer =
+ createPositionsDFW(attr.getName().c_str(), &attribute_man);
+ ASSERT_TRUE(writer.get());
+ ResType res_type = RES_LONG_STRING;
+ RawBuf target(1024);
+ MyGetDocsumsStateCallback callback;
+ GetDocsumsState state(callback);
+ state._attributes.push_back(&attr);
+
+ writer->WriteField(doc_id, 0, &state, res_type, &target);
+
+ test.EXPECT_EQUAL(expected.size(), *(const uint32_t *)(target.GetDrainPos()));
+ const char *p = target.GetDrainPos() + 4;
+ test.EXPECT_EQUAL(expected, string(p, p + expected.size()));
+}
+
+void Test::requireThat2DPositionFieldIsWritten() {
+ SingleInt64ExtAttribute attr("foo");
+ checkWritePositionField(*this, attr, 0x3e, "<position x=\"6\" y=\"7\" latlong=\"N0.000007;E0.000006\" />");
+ checkWritePositionField(*this, attr, 007, "<position x=\"-1\" y=\"-1\" latlong=\"S0.000001;W0.000001\" />");
+ checkWritePositionField(*this, attr, 0x42, "<position x=\"0\" y=\"-1\" latlong=\"S0.000001;E0.000000\" />");
+ checkWritePositionField(*this, attr, 0x17, "<position x=\"-16711935\" y=\"16711935\" latlong=\"N16.711935;W16.711935\" />");
+ checkWritePositionField(*this, attr, 42, "");
+
+}
+
+} // namespace
+} // namespace docsummary
+} // namespace search
+
+TEST_APPHOOK(search::docsummary::Test);
diff --git a/searchsummary/src/tests/docsummary/slime_summary/.gitignore b/searchsummary/src/tests/docsummary/slime_summary/.gitignore
new file mode 100644
index 00000000000..1df864db333
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/slime_summary/.gitignore
@@ -0,0 +1 @@
+searchsummary_slime_summary_test_app
diff --git a/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt b/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt
new file mode 100644
index 00000000000..a2bd3bbc610
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_slime_summary_test_app
+ SOURCES
+ slime_summary_test.cpp
+ DEPENDS
+ searchsummary
+)
+vespa_add_test(NAME searchsummary_slime_summary_test_app COMMAND searchsummary_slime_summary_test_app)
diff --git a/searchsummary/src/tests/docsummary/slime_summary/FILES b/searchsummary/src/tests/docsummary/slime_summary/FILES
new file mode 100644
index 00000000000..6d3a81d3ffc
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/slime_summary/FILES
@@ -0,0 +1 @@
+slime_summary_test.cpp
diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp
new file mode 100644
index 00000000000..6509491d0ac
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp
@@ -0,0 +1,125 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchsummary/docsummary/docsumwriter.h>
+#include <vespa/searchsummary/docsummary/resultpacker.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/data/slime/simple_buffer.h>
+#include <vespa/vespalib/data/slime/json_format.h>
+#include <vespa/vespalib/data/slime/binary_format.h>
+#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
+
+using namespace vespalib::slime::convenience;
+using namespace search::docsummary;
+
+namespace {
+
+struct FieldBlock {
+ Slime slime;
+ search::RawBuf binary;
+
+ explicit FieldBlock(const vespalib::string &jsonInput)
+ : slime(), binary(1024)
+ {
+ size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime);
+ EXPECT_EQUAL(jsonInput.size(), used);
+ search::SlimeOutputRawBufAdapter adapter(binary);
+ vespalib::slime::BinaryFormat::encode(slime, adapter);
+ }
+ const char *data() const { return binary.GetDrainPos(); }
+ size_t dataLen() const { return binary.GetUsedLen(); }
+};
+
+struct DocsumFixture : IDocsumStore, GetDocsumsStateCallback {
+ std::unique_ptr<DynamicDocsumWriter> writer;
+ std::unique_ptr<ResultPacker> packer;
+ GetDocsumsState state;
+ DocsumFixture() : writer(), packer(), state(*this) {
+ ResultConfig *config = new ResultConfig();
+ ResultClass *cfg = config->AddResultClass("default", 0);
+ EXPECT_TRUE(cfg != 0);
+ EXPECT_TRUE(cfg->AddConfigEntry("int_field", RES_INT));
+ EXPECT_TRUE(cfg->AddConfigEntry("short_field", RES_SHORT));
+ EXPECT_TRUE(cfg->AddConfigEntry("byte_field", RES_BYTE));
+ EXPECT_TRUE(cfg->AddConfigEntry("float_field", RES_FLOAT));
+ EXPECT_TRUE(cfg->AddConfigEntry("double_field", RES_DOUBLE));
+ EXPECT_TRUE(cfg->AddConfigEntry("int64_field", RES_INT64));
+ EXPECT_TRUE(cfg->AddConfigEntry("string_field", RES_STRING));
+ EXPECT_TRUE(cfg->AddConfigEntry("data_field", RES_DATA));
+ EXPECT_TRUE(cfg->AddConfigEntry("longstring_field", RES_LONG_STRING));
+ EXPECT_TRUE(cfg->AddConfigEntry("longdata_field", RES_LONG_DATA));
+ EXPECT_TRUE(cfg->AddConfigEntry("xmlstring_field", RES_XMLSTRING));
+ EXPECT_TRUE(cfg->AddConfigEntry("jsonstring_field", RES_JSONSTRING));
+ EXPECT_TRUE(cfg->AddConfigEntry("bad_jsonstring_field", RES_JSONSTRING));
+ config->CreateEnumMaps();
+ writer.reset(new DynamicDocsumWriter(config, 0));
+ packer.reset(new ResultPacker(writer->GetResultConfig()));
+ state._args.setFlags(search::fs4transport::GDFLAG_ALLOW_SLIME);
+ }
+ void getDocsum(Slime &slime) {
+ uint32_t classId;
+ search::RawBuf buf(4096);
+ writer->WriteDocsum(1u, &state, this, &buf);
+ ASSERT_GREATER(buf.GetUsedLen(), sizeof(classId));
+ memcpy(&classId, buf.GetDrainPos(), sizeof(classId));
+ buf.Drain(sizeof(classId));
+ EXPECT_EQUAL(classId, ::search::fs4transport::SLIME_MAGIC_ID);
+ EXPECT_GREATER(vespalib::slime::BinaryFormat
+ ::decode(Memory(buf.GetDrainPos(), buf.GetUsedLen()), slime), 0u);
+ }
+ virtual uint32_t getNumDocs() { return 2; }
+ virtual DocsumStoreValue getMappedDocsum(uint32_t docid, bool useSlimeInsideFields) {
+ EXPECT_EQUAL(true, useSlimeInsideFields);
+ EXPECT_EQUAL(1u, docid);
+ EXPECT_TRUE(packer->Init(0));
+ EXPECT_TRUE(packer->AddInteger(4));
+ EXPECT_TRUE(packer->AddShort(2));
+ EXPECT_TRUE(packer->AddByte(1));
+ EXPECT_TRUE(packer->AddFloat(4.5));
+ EXPECT_TRUE(packer->AddDouble(8.75));
+ EXPECT_TRUE(packer->AddInt64(8));
+ EXPECT_TRUE(packer->AddString( "string",
+ strlen("string")));
+ EXPECT_TRUE(packer->AddData( "data",
+ strlen("data")));
+ EXPECT_TRUE(packer->AddLongString( "long_string",
+ strlen("long_string")));
+ EXPECT_TRUE(packer->AddLongData( "long_data",
+ strlen("long_data")));
+ EXPECT_TRUE(packer->AddLongString( "xml_string",
+ strlen("xml_string")));
+ FieldBlock jsf1("{foo:1, bar:2}");
+ EXPECT_TRUE(packer->AddLongData(jsf1.data(), jsf1.dataLen()));
+ EXPECT_TRUE(packer->AddLongString("abc", 3));
+ const char *buf;
+ uint32_t len;
+ EXPECT_TRUE(packer->GetDocsumBlob(&buf, &len));
+ return DocsumStoreValue(buf, len);
+ }
+ uint32_t getSummaryClassId() const override { return 0; }
+ virtual void FillSummaryFeatures(GetDocsumsState *, IDocsumEnvironment *) {}
+ virtual void FillRankFeatures(GetDocsumsState *, IDocsumEnvironment *) {}
+ virtual void ParseLocation(GetDocsumsState *) {}
+};
+
+} // namespace <unnamed>
+
+TEST_FF("require that docsum can be written as slime", DocsumFixture(), Slime()) {
+ f1.getDocsum(f2);
+ EXPECT_EQUAL(f2.get()["int_field"].asLong(), 4u);
+ EXPECT_EQUAL(f2.get()["short_field"].asLong(), 2u);
+ EXPECT_EQUAL(f2.get()["byte_field"].asLong(), 1u);
+ EXPECT_EQUAL(f2.get()["float_field"].asDouble(), 4.5);
+ EXPECT_EQUAL(f2.get()["double_field"].asDouble(), 8.75);
+ EXPECT_EQUAL(f2.get()["int64_field"].asLong(), 8u);
+ EXPECT_EQUAL(f2.get()["string_field"].asString().make_string(), std::string("string"));
+ EXPECT_EQUAL(f2.get()["data_field"].asData().make_string(), std::string("data"));
+ EXPECT_EQUAL(f2.get()["longstring_field"].asString().make_string(), std::string("long_string"));
+ EXPECT_EQUAL(f2.get()["longdata_field"].asData().make_string(), std::string("long_data"));
+ EXPECT_EQUAL(f2.get()["xmlstring_field"].asString().make_string(), std::string("xml_string"));
+ EXPECT_EQUAL(f2.get()["jsonstring_field"]["foo"].asLong(), 1u);
+ EXPECT_EQUAL(f2.get()["jsonstring_field"]["bar"].asLong(), 2u);
+ EXPECT_EQUAL(f2.get()["bad_jsonstring_field"].type().getId(), 0u);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchsummary/src/tests/extractkeywords/.gitignore b/searchsummary/src/tests/extractkeywords/.gitignore
new file mode 100644
index 00000000000..1b50b24b284
--- /dev/null
+++ b/searchsummary/src/tests/extractkeywords/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+extractkeywordstest
+searchsummary_extractkeywordstest_app
diff --git a/searchsummary/src/tests/extractkeywords/CMakeLists.txt b/searchsummary/src/tests/extractkeywords/CMakeLists.txt
new file mode 100644
index 00000000000..d726ffe794c
--- /dev/null
+++ b/searchsummary/src/tests/extractkeywords/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_extractkeywordstest_app
+ SOURCES
+ extractkeywordstest.cpp
+ DEPENDS
+ searchsummary
+)
+vespa_add_test(NAME searchsummary_extractkeywordstest_app COMMAND sh runtests.sh)
diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp
new file mode 100644
index 00000000000..59d949f40ca
--- /dev/null
+++ b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp
@@ -0,0 +1,295 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchsummary/docsummary/keywordextractor.h>
+#include <vespa/searchlib/parsequery/simplequerystack.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include "extractkeywordstest.h"
+
+#define NUMTESTS 5
+
+int
+ExtractKeywordsTest::Main()
+{
+ int doTest[NUMTESTS];
+ int low, high, accnum, num;
+ int indicator;
+ bool verify = false;
+ int multiplier = 1;
+ bool failed = false;
+
+ if (_argc == 1)
+ Usage(_argv[0]);
+
+ // default initialize to not run any tests.
+ for (int n = 0; n < NUMTESTS; n++)
+ doTest[n] = 0;
+
+ // parse the command line arguments
+ for (int i = 1; i < _argc; i++) {
+ low = 0;
+ high = NUMTESTS - 1;
+ char *p = _argv[i];
+
+ // Check if a multiplier is specified
+ if (*p == '*') {
+ p++;
+ accnum = 0;
+ while (*p != '\0') {
+ num = *p - '0';
+ accnum = accnum * 10 + num;
+ p++;
+ }
+ multiplier = accnum;
+ continue;
+ }
+
+ // Default is to run the tests specified, unless the first char is '/'
+ indicator = 1;
+ if (*p == '/') {
+ p++;
+ indicator = 0;
+ }
+
+ // Find the first number
+ accnum = 0;
+ while (*p != '-' && *p != '\0') {
+ num = *p - '0';
+ accnum = accnum * 10 + num;
+ p++;
+ }
+ if (accnum >= NUMTESTS)
+ continue;
+ low = accnum;
+ // Check for range operator
+ if (*p == '-') {
+ p++;
+ // Find the second number
+ accnum = 0;
+ while (*p != '\0') {
+ num = *p - '0';
+ accnum = accnum * 10 + num;
+ p++;
+ }
+ if (accnum > 0)
+ high = accnum < NUMTESTS ? accnum : NUMTESTS-1;
+ } else
+ high = low;
+
+ // Indicate the runrequest for the desired range.
+ for (int j = low; j <= high; j++)
+ doTest[j] = indicator;
+ }
+
+ // Remove unused tests.
+ // doTest[1] = 0;
+
+ // Remember time
+ if (multiplier > 1) {
+ printf("Running all tests %d times.\n", multiplier);
+ verify = false;
+ } else {
+ verify = true;
+ }
+
+ int testCnt = 0;
+
+ // init keyword extractor
+ _extractor = new search::docsummary::KeywordExtractor(NULL);
+ _extractor->AddLegalIndexSpec("*");
+
+ FastOS_Time timer;
+ timer.SetNow();
+
+ // Actually run the tests that we wanted.
+ for (int j = 0; j < multiplier; j++)
+ for (int k = 0; k < NUMTESTS; k++)
+ if (doTest[k] == 1) {
+ if (!RunTest(k, verify))
+ failed = true;
+ testCnt++;
+ }
+
+ // Print time taken
+ double timeTaken = timer.MilliSecsToNow();
+
+ printf("Time taken : %f ms\n", timeTaken);
+ printf("Number of tests run: %d\n", testCnt);
+ double avgTestPrMSec = static_cast<double>(testCnt) / timeTaken;
+ printf("Tests pr Sec: %f\n", avgTestPrMSec * 1000.0);
+
+ delete _extractor;
+ _extractor = NULL;
+
+ return failed ? 1 : 0;
+}
+
+bool
+ExtractKeywordsTest::ShowResult(int testNo,
+ const char *actual, const char *correct)
+{
+ const char *act_word = actual;
+ const char *cor_word = correct;
+ printf("%03d: ", testNo);
+
+ while (*act_word != '\0') {
+ if (strcmp(act_word, cor_word) != 0) {
+ printf("fail. Keywords differ for act: %s, corr: %s\n",
+ act_word, cor_word);
+ return false;
+ } else {
+ act_word += strlen(act_word) + 1;
+ cor_word += strlen(cor_word) + 1;
+ }
+ }
+ if (*cor_word != '\0') {
+ printf("fail. actual list shorter than correct at %s\n", cor_word);
+ return false;
+ }
+ printf("ok\n");
+ return true;
+}
+
+/**
+ *
+ * @param testno The test to run.
+ * @param verify Verify the result of the test.
+ */
+bool
+ExtractKeywordsTest::RunTest(int testno, bool verify)
+{
+ search::SimpleQueryStack stack;
+ search::RawBuf buf(32768);
+ const char *correct = NULL;
+ const char *keywords = NULL;
+
+ switch (testno) {
+ case 0:
+ {
+ // Simple term query
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+
+ stack.AppendBuffer(&buf);
+ keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ correct = "foobar\0\0";
+
+ if (verify) ShowResult(testno, keywords, correct);
+ free(const_cast<char *>(keywords));
+ break;
+ }
+
+ case 1:
+ {
+ // multi term query
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 3));
+
+ stack.AppendBuffer(&buf);
+ keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ correct = "bar\0foo\0foobar\0\0";
+
+ if (verify) ShowResult(testno, keywords, correct);
+ free(const_cast<char *>(keywords));
+ break;
+ }
+
+ case 2:
+ {
+ // phrase term query
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3));
+
+ stack.AppendBuffer(&buf);
+ keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ correct = "bar foo foobar\0\0";
+
+ if (verify) ShowResult(testno, keywords, correct);
+ free(const_cast<char *>(keywords));
+ break;
+ }
+
+ case 3:
+ {
+ // multiple phrase and term query
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "xyzzy"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "xyz"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 2));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "baz"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "zog"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 3));
+
+ stack.AppendBuffer(&buf);
+ keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ correct = "zog\0baz\0bar foo foobar\0xyz xyzzy\0\0";
+
+ if (verify) ShowResult(testno, keywords, correct);
+ free(const_cast<char *>(keywords));
+ break;
+ }
+
+ case 4:
+ {
+ // phrase term query with wrong argument items
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 2));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 2));
+
+ stack.AppendBuffer(&buf);
+ keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ correct = "\0";
+
+ if (verify) ShowResult(testno, keywords, correct);
+ free(const_cast<char *>(keywords));
+ break;
+ }
+
+ default:
+ {
+ printf("%03d: no such test\n", testno);
+ return false;
+ }
+ }
+
+ bool result = true;
+ /*
+ if (verify) {
+ result = ShowResult(testno, pq->GetStack(), correct);
+ delete correct;
+ } else {
+ result = true;
+ }
+ delete pq;
+ */
+ return result;
+}
+
+void
+ExtractKeywordsTest::Usage(char *progname)
+{
+ printf("%s {testnospec}+\n\
+ Where testnospec is:\n\
+ num: single test\n\
+ num-num: inclusive range (open range permitted)\n",progname);
+ printf("There are tests from %d to %d\n\n", 0, NUMTESTS-1);
+ exit(-1);
+}
+
+int
+main(int argc, char** argv)
+{
+ ExtractKeywordsTest tester;
+ return tester.Entry(argc, argv);
+}
+
diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.h b/searchsummary/src/tests/extractkeywords/extractkeywordstest.h
new file mode 100644
index 00000000000..1a037fcb9cd
--- /dev/null
+++ b/searchsummary/src/tests/extractkeywords/extractkeywordstest.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+
+namespace search {
+namespace docummary {
+class KeywordExtractor;
+}
+}
+
+class ExtractKeywordsTest : public FastOS_Application
+{
+private:
+ ExtractKeywordsTest(const ExtractKeywordsTest &);
+ ExtractKeywordsTest& operator=(const ExtractKeywordsTest &);
+
+ search::docsummary::KeywordExtractor *_extractor;
+
+ int Main();
+ void Usage(char *progname);
+ bool ShowResult(int testNo, const char *actual, const char *correct);
+ bool RunTest(int i, bool verify);
+
+public:
+ ExtractKeywordsTest(void)
+ : _extractor(NULL)
+ {
+ }
+};
+
diff --git a/searchsummary/src/tests/extractkeywords/runtests.sh b/searchsummary/src/tests/extractkeywords/runtests.sh
new file mode 100755
index 00000000000..2c09bb25460
--- /dev/null
+++ b/searchsummary/src/tests/extractkeywords/runtests.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#
+# $Id$
+#
+# Copyright (C) 2000-2003 Fast Search & Transfer ASA
+# Copyright (C) 2003 Overture Services Norway AS
+#
+# All Rights Reserved
+#
+
+if $VALGRIND ./searchsummary_extractkeywordstest_app -
+then
+ :
+else
+ echo FAILED: searchsummary_extractkeywordstest_app test failed
+ exit 1
+fi
+
+if $VALGRIND ./searchsummary_extractkeywordstest_app - '*1000'
+then
+ :
+else
+ echo FAILED: searchsummary_extractkeywordstest_app test failed
+ exit 1
+fi
+
+echo SUCCESS: searchsummary_extractkeywordstest_app test completed
+exit 0
diff --git a/searchsummary/src/tests/extractkeywords/testowner.ATS b/searchsummary/src/tests/extractkeywords/testowner.ATS
new file mode 100644
index 00000000000..6d03b0836a4
--- /dev/null
+++ b/searchsummary/src/tests/extractkeywords/testowner.ATS
@@ -0,0 +1 @@
+vlarsen
diff --git a/searchsummary/src/vespa/searchsummary/.gitignore b/searchsummary/src/vespa/searchsummary/.gitignore
new file mode 100644
index 00000000000..4ecafa4a29f
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/libsearchsummary.so.5.1
diff --git a/searchsummary/src/vespa/searchsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/CMakeLists.txt
new file mode 100644
index 00000000000..078c1b137a8
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchsummary
+ SOURCES
+ $<TARGET_OBJECTS:searchsummary_config>
+ $<TARGET_OBJECTS:searchsummary_docsummary>
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/searchsummary/src/vespa/searchsummary/config/.gitignore b/searchsummary/src/vespa/searchsummary/config/.gitignore
new file mode 100644
index 00000000000..0d614ad8ec7
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/config/.gitignore
@@ -0,0 +1,5 @@
+*.So
+.depend
+Makefile
+config-*.cpp
+config-*.h
diff --git a/searchsummary/src/vespa/searchsummary/config/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/config/CMakeLists.txt
new file mode 100644
index 00000000000..5619b2f0a26
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/config/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchsummary_config OBJECT
+ SOURCES
+ DEPENDS
+)
+vespa_generate_config(searchsummary_config juniperrc.def)
+install(FILES juniperrc.def DESTINATION var/db/vespa/config_server/serverdb/classes)
diff --git a/searchsummary/src/vespa/searchsummary/config/juniperrc.def b/searchsummary/src/vespa/searchsummary/config/juniperrc.def
new file mode 100644
index 00000000000..4a748b2f604
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/config/juniperrc.def
@@ -0,0 +1,78 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+namespace=vespa.config.search.summary
+
+## Set the length (in #characters) of the dynamically generated
+## summaries. This is a hint to the module that generates the
+## dynamic summary. The result may be slightly longer or shorter
+## depending on the structure of the available document text and
+## the submitted query.
+length int default=256
+
+## The number of (possibly partial) set of keywords
+## matching the query, to attempt to include in the summary. The larger this
+## value compared is set relative to the length parameter, the more
+## dense the keywords may appear in the summary.
+max_matches int default=3
+
+## Minimal desired length of the generated summary in
+## bytes. This is the shortest summary length for which the number of
+## matches will be respected. Eg. if
+## a summary appear to become shorter than min_length bytes with
+## max_matches matches, then additional matches will be used if available.
+min_length int default=128
+
+## Make sure the prefix (length controlled by 'juniper.dynsum.length')
+## of all fields with summary: dynamic are returned in the dynamic
+## summary if a query does not hit in those fields
+prefix bool default=true
+
+## The maximal number of bytes of context to prepend and append to
+## each of the selected query keyword hits. This parameter defines the
+## max size a summary would become if there are few keyword hits
+## (max_matches set low or document contained few matches of the keywords).
+surround_max int default=128
+
+## The size of the sliding window used to determine if
+## multiple query terms occur together. The larger the value, the more
+## likely the system will find (and present in dynamic summary) complete
+## matches containing all the search terms. The downside is a potential
+## performance overhead of keeping candidates for matches longer during
+## matching, and consequently updating more candidates that eventually
+## gets thrown.
+winsize int default=200
+
+## This value multiplied with the winsize gives the size of a fallback
+## window used to break out when searching for phrase term matches.
+winsize_fallback_multiplier double default=10.0
+
+## This value specifies the maximum number of match candidates that are
+## managed for a non-leaf query node when matching the query against the
+## input text.
+max_match_candidates int default=1000
+
+## The minimal number of bytes in a query keyword for
+## it to be subject to the simple Juniper stemming algorithm. Keywords
+## that are shorter than or equal to this limit will only yield exact
+## matches in the dynamic summaries.
+stem_min_length int default=5
+
+## The maximal number of bytes that a word in the document
+## can be longer than the keyword itself to yield a match. Eg. for
+## the default values, if the keyword is 7 bytes long, it will match any
+## word with length less than or equal to 10 for which the keyword is a prefix.
+stem_max_extend int default=3
+
+
+## The parameters above may also be overriden on a per-field basis
+## using the following array.
+override[].fieldname string
+override[].length int default=256
+override[].max_matches int default=3
+override[].min_length int default=128
+override[].prefix bool default=true
+override[].surround_max int default=128
+override[].winsize int default=200
+override[].winsize_fallback_multiplier double default=10.0
+override[].max_match_candidates int default=1000
+override[].stem_min_length int default=5
+override[].stem_max_extend int default=3
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/.gitignore b/searchsummary/src/vespa/searchsummary/docsummary/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
new file mode 100644
index 00000000000..bbd7dc1e177
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchsummary_docsummary OBJECT
+ SOURCES
+ resultclass.cpp
+ resultconfig.cpp
+ resultpacker.cpp
+ urlresult.cpp
+ getdocsumargs.cpp
+ docsumstate.cpp
+ docsumfieldwriter.cpp
+ docsumwriter.cpp
+ keywordextractor.cpp
+ attributedfw.cpp
+ dynamicteaserdfw.cpp
+ docsumconfig.cpp
+ rankfeaturesdfw.cpp
+ summaryfeaturesdfw.cpp
+ juniperproperties.cpp
+ textextractordfw.cpp
+ docsumformat.cpp
+ geoposdfw.cpp
+ tokenizer.cpp
+ positionsdfw.cpp
+ AFTER
+ searchsummary_config
+)
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp
new file mode 100644
index 00000000000..4eef9e6a9c6
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp
@@ -0,0 +1,435 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+#include <vespa/searchsummary/docsummary/docsumwriter.h>
+#include <vespa/searchsummary/docsummary/attributedfw.h>
+#include <vespa/vespalib/tensor/tensor.h>
+#include <vespa/vespalib/tensor/serialization/slime_binary_format.h>
+
+LOG_SETUP(".searchlib.docsummary.attributedfw");
+
+using namespace search;
+using search::attribute::IAttributeContext;
+using search::attribute::IAttributeVector;
+using search::attribute::BasicType;
+
+namespace search {
+namespace docsummary {
+
+ResType inferType(const IAttributeVector & vec) {
+ ResType retval;
+ if (vec.hasMultiValue()) {
+ retval = RES_STRING;
+ } else {
+ if (vec.isStringType()) {
+ retval = RES_STRING;
+ } else {
+ size_t fw = vec.getFixedWidth();
+ if (vec.isIntegerType()) {
+ if (fw == sizeof(uint8_t)) {
+ retval = RES_BYTE;
+ } else if (fw == sizeof(uint16_t)) {
+ retval = RES_SHORT;
+ } else if (fw == sizeof(uint32_t)) {
+ retval = RES_INT;
+ } else {
+ retval = RES_INT64;
+ }
+ } else if (vec.isFloatingPointType()) {
+ retval = (fw == sizeof(float)) ? RES_FLOAT : RES_DOUBLE;
+ } else {
+ retval = RES_STRING;
+ }
+ }
+ }
+ return retval;
+}
+
+//-----------------------------------------------------------------------------
+
+AttrDFW::AttrDFW(const vespalib::string & attrName) :
+ _attrName(attrName)
+{
+}
+
+//-----------------------------------------------------------------------------
+
+class SingleAttrDFW : public AttrDFW
+{
+public:
+ SingleAttrDFW(const vespalib::string & attrName) :
+ AttrDFW(attrName)
+ { }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+ virtual bool isDefaultValue(uint32_t docid, const GetDocsumsState * state) const;
+};
+
+uint32_t
+SingleAttrDFW::WriteField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * state,
+ ResType type,
+ RawBuf *target)
+{
+ const char *s="";
+ const IAttributeVector & v = vec(*state);
+ switch (type) {
+ case RES_INT: {
+ uint32_t val = v.getInt(docid);
+ target->append(&val, sizeof(val));
+ return sizeof(val);
+ break; }
+ case RES_SHORT: {
+ uint16_t val = v.getInt(docid);
+ target->append(&val, sizeof(val));
+ return sizeof(val);
+ break; }
+ case RES_BYTE: {
+ uint8_t val = v.getInt(docid);
+ target->append(&val, sizeof(val));
+ return sizeof(val);
+ break; }
+ case RES_FLOAT: {
+ float val = v.getFloat(docid);
+ target->append(&val, sizeof(val));
+ return sizeof(val);
+ break; }
+ case RES_DOUBLE: {
+ double val = v.getFloat(docid);
+ target->append(&val, sizeof(val));
+ return sizeof(val);
+ break; }
+ case RES_INT64: {
+ uint64_t val = v.getInt(docid);
+ target->append(&val, sizeof(val));
+ return sizeof(val);
+ break; }
+ case RES_STRING:
+ case RES_DATA: {
+ s = v.getString(docid, NULL, 0); // no need to pass in a buffer, this attribute has a string storage.
+ uint32_t len = strlen(s);
+ uint16_t slen = (len < 0xffff) ? len : 0xffff;
+ target->append(&slen, sizeof(slen));
+ target->append(s, slen);
+ return (sizeof(slen) + slen);
+ break; }
+ case RES_JSONSTRING: {
+ BasicType::Type t = v.getBasicType();
+ switch (t) {
+ case BasicType::TENSOR: {
+ const attribute::TensorAttribute &tv =
+ static_cast<const attribute::TensorAttribute &>(v);
+ const auto tensor = tv.getTensor(docid);
+ vespalib::string str;
+ if (tensor) {
+ auto slime =
+ vespalib::tensor::SlimeBinaryFormat::serialize(*tensor);
+ vespalib::slime::SimpleBuffer buf;
+ vespalib::slime::JsonFormat::encode(*slime, buf, true);
+ str = buf.get().make_string();
+ } else {
+ // No tensor value => empty object
+ str = "";
+ }
+ uint32_t slen = str.size();
+ target->append(&slen, sizeof(slen));
+ target->append(str.c_str(), slen);
+ return (sizeof(slen) + slen);
+ }
+ default:
+ break;
+ };
+ }
+ /* FALLTHROUGH */
+ case RES_XMLSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING:
+ case RES_LONG_DATA: {
+ s = v.getString(docid, NULL, 0); // no need to pass in a buffer, this attribute has a string storage.
+ uint32_t slen = strlen(s);
+ target->append(&slen, sizeof(slen));
+ target->append(s, slen);
+ return (sizeof(slen) + slen);
+ break; }
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+bool SingleAttrDFW::isDefaultValue(uint32_t docid, const GetDocsumsState * state) const
+{
+ return vec(*state).isUndefined(docid);
+}
+
+void
+SingleAttrDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * state,
+ ResType type,
+ vespalib::slime::Inserter &target)
+{
+ const char *s="";
+ const IAttributeVector & v = vec(*state);
+ switch (type) {
+ case RES_INT: {
+ uint32_t val = v.getInt(docid);
+ target.insertLong(val);
+ break;
+ }
+ case RES_SHORT: {
+ uint16_t val = v.getInt(docid);
+ target.insertLong(val);
+ break;
+ }
+ case RES_BYTE: {
+ uint8_t val = v.getInt(docid);
+ target.insertLong(val);
+ break;
+ }
+ case RES_FLOAT: {
+ float val = v.getFloat(docid);
+ target.insertDouble(val);
+ break;
+ }
+ case RES_DOUBLE: {
+ double val = v.getFloat(docid);
+ target.insertDouble(val);
+ break;
+ }
+ case RES_INT64: {
+ uint64_t val = v.getInt(docid);
+ target.insertLong(val);
+ break;
+ }
+ case RES_JSONSTRING: {
+ BasicType::Type t = v.getBasicType();
+ switch (t) {
+ case BasicType::TENSOR: {
+ const attribute::TensorAttribute &tv =
+ static_cast<const attribute::TensorAttribute &>(v);
+ const auto tensor = tv.getTensor(docid);
+ if (tensor) {
+ vespalib::tensor::SlimeBinaryFormat::serialize(target, *tensor);
+ } else {
+ // No tensor value => no object
+ }
+ return;
+ }
+ default:
+ break;
+ };
+ }
+ /* FALLTHROUGH */
+ case RES_XMLSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING:
+ case RES_STRING: {
+ s = v.getString(docid, NULL, 0); // no need to pass in a buffer, this attribute has a string storage.
+ target.insertString(vespalib::slime::Memory(s));
+ break;
+ }
+ case RES_LONG_DATA:
+ case RES_DATA: {
+ s = v.getString(docid, NULL, 0); // no need to pass in a buffer, this attribute has a string storage.
+ target.insertData(vespalib::slime::Memory(s));
+ break;
+ }
+ default:
+ // unknown type, will be missing, should not happen
+ return;
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+
+class MultiAttrDFW : public AttrDFW
+{
+public:
+ MultiAttrDFW(const vespalib::string & attrName) : AttrDFW(attrName) {}
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+
+};
+
+uint32_t
+MultiAttrDFW::WriteField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * state,
+ ResType type,
+ RawBuf *target)
+{
+ bool isLong = IsBinaryCompatible(type, RES_LONG_STRING);
+ uint32_t written = 0;
+ uint16_t str_len_16 = 0;
+ uint32_t str_len_32 = 0;
+ int str_len_ofs = target->GetUsedLen();
+ vespalib::JSONStringer & jsonStr = state->_jsonStringer;
+
+ if (isLong) {
+ target->append(&str_len_32, sizeof(str_len_32));
+ } else {
+ target->append(&str_len_16, sizeof(str_len_16));
+ }
+ const IAttributeVector & v = vec(*state);
+ uint32_t entries = v.getValueCount(docid);
+ {
+ std::vector<IAttributeVector::WeightedString> elements(entries);
+ entries = std::min(entries, v.get(docid, &elements[0], entries));
+ jsonStr.clear();
+ jsonStr.beginArray();
+ for (uint32_t i = 0; i < entries; ++i) {
+ if (v.hasWeightedSetType()) {
+ jsonStr.beginArray();
+ jsonStr.appendString(elements[i].getValue());
+ jsonStr.appendInt64(elements[i].getWeight());
+ jsonStr.endArray();
+ } else {
+ jsonStr.appendString(elements[i].getValue());
+ }
+ }
+ jsonStr.endArray();
+ (*target) += jsonStr.toString().c_str();
+ jsonStr.clear();
+ }
+
+ // calculate number of bytes written
+ written = target->GetUsedLen() - str_len_ofs;
+
+ // patch in correct field length
+ if (isLong) {
+ str_len_32 = written - sizeof(str_len_32);
+ memcpy(target->GetWritableDrainPos(str_len_ofs),
+ &str_len_32, sizeof(str_len_32));
+ } else {
+ str_len_16 = written - sizeof(str_len_16);
+ if (str_len_16 != written - sizeof(str_len_16)) {
+ target->truncate(str_len_ofs);
+ str_len_16 = 0;
+ target->append(&str_len_16, sizeof(uint16_t));
+ *target += "***OVERFLOW***";
+ written = target->GetUsedLen() - str_len_ofs;
+ str_len_16 = written - sizeof(uint16_t);
+ assert(str_len_16 == written - sizeof(uint16_t));
+ }
+ memcpy(target->GetWritableDrainPos(str_len_ofs),
+ &str_len_16, sizeof(str_len_16));
+ }
+ return written;
+}
+
+void
+MultiAttrDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState *state,
+ ResType,
+ vespalib::slime::Inserter &target)
+{
+ using vespalib::slime::Cursor;
+ using vespalib::slime::Memory;
+ const IAttributeVector & v = vec(*state);
+ uint32_t entries = v.getValueCount(docid);
+ bool isWeightedSet = v.hasWeightedSetType();
+
+ Cursor &arr = target.insertArray();
+ BasicType::Type t = v.getBasicType();
+ switch (t) {
+ case BasicType::NONE:
+ case BasicType::STRING: {
+ std::vector<IAttributeVector::WeightedString> elements(entries);
+ entries = std::min(entries, v.get(docid, &elements[0], entries));
+ for (uint32_t i = 0; i < entries; ++i) {
+ const vespalib::string &sv = elements[i].getValue();
+ Memory value(sv.c_str(), sv.size());
+ if (isWeightedSet) {
+ Cursor &elem = arr.addObject();
+ elem.setString("item", value);
+ elem.setLong("weight", elements[i].getWeight());
+ } else {
+ arr.addString(value);
+ }
+ }
+ return; }
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ case BasicType::INT8:
+ case BasicType::INT16:
+ case BasicType::INT32:
+ case BasicType::INT64: {
+ std::vector<IAttributeVector::WeightedInt> elements(entries);
+ entries = std::min(entries, v.get(docid, &elements[0], entries));
+ for (uint32_t i = 0; i < entries; ++i) {
+ if (isWeightedSet) {
+ Cursor &elem = arr.addObject();
+ elem.setLong("item", elements[i].getValue());
+ elem.setLong("weight", elements[i].getWeight());
+ } else {
+ arr.addLong(elements[i].getValue());
+ }
+ }
+ return; }
+ case BasicType::FLOAT:
+ case BasicType::DOUBLE: {
+ std::vector<IAttributeVector::WeightedFloat> elements(entries);
+ entries = std::min(entries, v.get(docid, &elements[0], entries));
+ for (uint32_t i = 0; i < entries; ++i) {
+ if (isWeightedSet) {
+ Cursor &elem = arr.addObject();
+ elem.setDouble("item", elements[i].getValue());
+ elem.setLong("weight", elements[i].getWeight());
+ } else {
+ arr.addDouble(elements[i].getValue());
+ }
+ }
+ return; }
+ default:
+ // should not happen
+ LOG(error, "bad value for type: %u\n", t);
+ LOG_ASSERT(false);
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+IDocsumFieldWriter *
+AttributeDFWFactory::create(IAttributeManager & vecMan, const char *vecName)
+{
+ IAttributeContext::UP ctx = vecMan.createContext();
+ const IAttributeVector * vec = ctx->getAttribute(vecName);
+ if (vec == NULL) {
+ LOG(warning, "No valid attribute vector found: %s", vecName);
+ return NULL;
+ }
+ if (vec->hasMultiValue()) {
+ return new MultiAttrDFW(vec->getName());
+ } else {
+ return new SingleAttrDFW(vec->getName());
+ }
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.h b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.h
new file mode 100644
index 00000000000..b8ac9b30510
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+
+namespace search {
+namespace docsummary {
+
+class AttrDFW : public IDocsumFieldWriter
+{
+private:
+ vespalib::string _attrName;
+protected:
+ const attribute::IAttributeVector & vec(const GetDocsumsState & s) const {
+ return *s.getAttribute(getIndex());
+ }
+ virtual const vespalib::string & getAttributeName() const { return _attrName; }
+public:
+ AttrDFW(const vespalib::string & attrName);
+ virtual bool IsGenerated() const { return true; }
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp
new file mode 100644
index 00000000000..18d383d5bfd
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchsummary/docsummary/docsumconfig.h>
+#include <vespa/searchsummary/docsummary/rankfeaturesdfw.h>
+#include <vespa/searchsummary/docsummary/summaryfeaturesdfw.h>
+#include <vespa/searchsummary/docsummary/textextractordfw.h>
+#include <vespa/searchsummary/docsummary/geoposdfw.h>
+#include <vespa/searchsummary/docsummary/positionsdfw.h>
+#include <vespa/searchsummary/docsummary/juniperdfw.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+
+LOG_SETUP(".searchlib.docsummary.docsumconfig");
+
+
+namespace search {
+namespace docsummary {
+
+using vespalib::IllegalArgumentException;
+using vespalib::make_string;
+
+IDocsumFieldWriter::UP
+DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string & overrideName, const string & argument, bool & rc)
+{
+ const ResultConfig & resultConfig = getResultConfig();
+ rc = false;
+ IDocsumFieldWriter::UP fieldWriter;
+ if (overrideName == "dynamicteaser") {
+ if ( ! argument.empty() ) {
+ const char *langFieldName = "something unused";
+ DynamicTeaserDFW *fw = new DynamicTeaserDFW(getEnvironment()->getJuniper());
+ fieldWriter.reset(fw);
+ rc = fw->Init(fieldName.c_str(), langFieldName, resultConfig, argument.c_str());
+ } else {
+ throw IllegalArgumentException("Missing argument");
+ }
+ } else if (overrideName == "textextractor") {
+ if ( ! argument.empty() ) {
+ TextExtractorDFW * fw = new TextExtractorDFW();
+ fieldWriter.reset(fw);
+ rc = fw->init(fieldName, argument, resultConfig);
+ } else {
+ throw IllegalArgumentException("Missing argument");
+ }
+ } else if (overrideName == "summaryfeatures") {
+ SummaryFeaturesDFW *fw = new SummaryFeaturesDFW();
+ fieldWriter.reset(fw);
+ fw->init(getEnvironment());
+ rc = true;
+ } else if (overrideName == "rankfeatures") {
+ RankFeaturesDFW * fw = new RankFeaturesDFW();
+ fw->init(getEnvironment());
+ fieldWriter.reset(fw);
+ rc = true;
+ } else if (overrideName == "empty") {
+ EmptyDFW *fw = new EmptyDFW();
+ fieldWriter.reset(fw);
+ rc = true;
+ } else if (overrideName == "copy") {
+ if ( ! argument.empty() ) {
+ CopyDFW *fw = new CopyDFW();
+ fieldWriter.reset(fw);
+ rc = fw->Init(resultConfig, argument.c_str());
+ } else {
+ throw IllegalArgumentException("Missing argument");
+ }
+ } else if (overrideName == "absdist") {
+ if (getEnvironment()) {
+ IAttributeManager *am = getEnvironment()->getAttributeManager();
+ fieldWriter = createAbsDistanceDFW(argument.c_str(), am);
+ rc = fieldWriter.get();
+ }
+ } else if (overrideName == "positions") {
+ if (getEnvironment()) {
+ IAttributeManager *am = getEnvironment()->getAttributeManager();
+ fieldWriter = createPositionsDFW(argument.c_str(), am);
+ rc = fieldWriter.get();
+ }
+ } else if (overrideName == "geopos") {
+ if (getEnvironment()) {
+ IAttributeManager *am = getEnvironment()->getAttributeManager();
+ fieldWriter = GeoPositionDFW::create(argument.c_str(), am);
+ rc = fieldWriter.get();
+ }
+ } else if (overrideName == "attribute") {
+ const char *vectorName = argument.c_str();
+ if (getEnvironment() && getEnvironment()->getAttributeManager()) {
+ IDocsumFieldWriter *fw = AttributeDFWFactory::create(*getEnvironment()->getAttributeManager(), vectorName);
+ fieldWriter.reset(fw);
+ rc = fw != NULL;
+ }
+ } else {
+ throw IllegalArgumentException("unknown override operation '" + overrideName + "' for field '" + fieldName + "'.");
+ }
+ return fieldWriter;
+}
+
+void
+DynamicDocsumConfig::configure(const vespa::config::search::SummarymapConfig &cfg)
+{
+ std::vector<string> strCfg;
+ if ((cfg.defaultoutputclass != -1) && !_writer->SetDefaultOutputClass(cfg.defaultoutputclass)) {
+ throw IllegalArgumentException(make_string("could not set default output class to %d", cfg.defaultoutputclass));
+ }
+ for (size_t i = 0; i < cfg.override.size(); ++i) {
+ const vespa::config::search::SummarymapConfig::Override & o = cfg.override[i];
+ // DYNAMIC TEASER
+ bool rc(false);
+ IDocsumFieldWriter::UP fieldWriter = createFieldWriter(o.field, o.command, o.arguments, rc);
+ if (rc && fieldWriter.get() != NULL) {
+ rc = _writer->Override(o.field.c_str(), fieldWriter.release()); // OBJECT HAND-OVER
+ }
+ if (!rc) {
+ throw IllegalArgumentException(o.command + " override operation failed during initialization");
+ }
+ }
+}
+
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.h
new file mode 100644
index 00000000000..04f2890e7c1
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/docsumwriter.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/searchsummary/docsummary/idocsumenvironment.h>
+#include <vespa/config-summarymap.h>
+
+namespace search {
+namespace docsummary {
+
+class DynamicDocsumConfig
+{
+public:
+ DynamicDocsumConfig(IDocsumEnvironment * env, DynamicDocsumWriter * writer) :
+ _env(env),
+ _writer(writer)
+ {
+ }
+ virtual ~DynamicDocsumConfig() { }
+ void configure(const vespa::config::search::SummarymapConfig &cfg);
+protected:
+ typedef vespalib::string string;
+ IDocsumEnvironment * getEnvironment() { return _env; }
+ const IDocsumEnvironment * getEnvironment() const { return _env; }
+ const ResultConfig & getResultConfig() const { return *_writer->GetResultConfig(); }
+
+ virtual IDocsumFieldWriter::UP
+ createFieldWriter(const string & fieldName, const string & overrideName,
+ const string & argument, bool & rc);
+private:
+ IDocsumEnvironment * _env;
+ DynamicDocsumWriter * _writer;
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp
new file mode 100644
index 00000000000..fa379397476
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp
@@ -0,0 +1,286 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <math.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/common/documentlocations.h>
+#include <vespa/searchlib/common/location.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/searchsummary/docsummary/idocsumenvironment.h>
+#include <vespa/searchsummary/docsummary/docsumformat.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+
+LOG_SETUP(".searchlib.docsummary.docsumfieldwriter");
+
+namespace search {
+namespace docsummary {
+
+using search::attribute::IAttributeContext;
+using search::attribute::IAttributeVector;
+using search::attribute::BasicType;
+using search::common::Location;
+
+//--------------------------------------------------------------------------
+
+const vespalib::string IDocsumFieldWriter::_empty("");
+
+//--------------------------------------------------------------------------
+
+EmptyDFW::EmptyDFW()
+{
+}
+
+
+EmptyDFW::~EmptyDFW()
+{
+}
+
+void
+EmptyDFW::insertField(uint32_t /*docid*/,
+ GeneralResult *,
+ GetDocsumsState *,
+ ResType,
+ vespalib::slime::Inserter &target)
+{
+ // insert explicitly-empty field?
+ // target.insertNix();
+ (void)target;
+ return;
+}
+
+uint32_t
+EmptyDFW::WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target)
+{
+ (void) docid;
+ (void) gres;
+ (void) state;
+ return DocsumFormat::addEmpty(type, *target);
+}
+
+//--------------------------------------------------------------------------
+
+CopyDFW::CopyDFW()
+ : _inputFieldEnumValue(static_cast<uint32_t>(-1))
+{
+}
+
+
+CopyDFW::~CopyDFW()
+{
+}
+
+
+bool
+CopyDFW::Init(const ResultConfig & config, const char *inputField)
+{
+ _inputFieldEnumValue = config.GetFieldNameEnum().Lookup(inputField);
+
+ if (_inputFieldEnumValue >= config.GetFieldNameEnum().GetNumEntries()) {
+ LOG(warning, "no docsum format contains field '%s'; copied fields will be empty", inputField);
+ }
+
+ for (ResultConfig::const_iterator it(config.begin()), mt(config.end()); it != mt; it++) {
+ const ResConfigEntry *entry =
+ it->GetEntry(it->GetIndexFromEnumValue(_inputFieldEnumValue));
+
+ if (entry != NULL &&
+ !IsRuntimeCompatible(entry->_type, RES_INT) &&
+ !IsRuntimeCompatible(entry->_type, RES_DOUBLE) &&
+ !IsRuntimeCompatible(entry->_type, RES_INT64) &&
+ !IsRuntimeCompatible(entry->_type, RES_STRING) &&
+ !IsRuntimeCompatible(entry->_type, RES_DATA)) {
+
+ LOG(warning, "cannot use docsum field '%s' as input to copy; type conflict with result class %d (%s)",
+ inputField, it->GetClassID(), it->GetClassName());
+ return false;
+ }
+ }
+ return true;
+}
+
+
+void
+CopyDFW::insertField(uint32_t /*docid*/,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target)
+{
+ int idx = gres->GetClass()->GetIndexFromEnumValue(_inputFieldEnumValue);
+ ResEntry *entry = gres->GetEntry(idx);
+
+ if (entry != NULL &&
+ IsRuntimeCompatible(entry->_type, type))
+ {
+ switch (type) {
+ case RES_INT: {
+ uint32_t val32 = entry->_intval;
+ target.insertLong(val32);
+ break; }
+
+ case RES_SHORT: {
+ uint16_t val16 = entry->_intval;
+ target.insertLong(val16);
+ break; }
+
+ case RES_BYTE: {
+ uint8_t val8 = entry->_intval;
+ target.insertLong(val8);
+ break; }
+
+ case RES_FLOAT: {
+ float valfloat = entry->_doubleval;
+ target.insertDouble(valfloat);
+ break; }
+
+ case RES_DOUBLE: {
+ double valdouble = entry->_doubleval;
+ target.insertDouble(valdouble);
+ break; }
+
+ case RES_INT64: {
+ uint64_t valint64 = entry->_int64val;
+ target.insertLong(valint64);
+ break; }
+
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING:
+ case RES_STRING: {
+ uint32_t len;
+ const char *spt;
+ // resolve field
+ entry->_resolve_field(&spt, &len,
+ &state->_docSumFieldSpace);
+ vespalib::slime::Memory value(spt, len);
+ target.insertString(value);
+ break; }
+
+ case RES_LONG_DATA:
+ case RES_DATA: {
+ uint32_t len;
+ const char *dpt;
+ // resolve field
+ entry->_resolve_field(&dpt, &len,
+ &state->_docSumFieldSpace);
+ vespalib::slime::Memory value(dpt, len);
+ target.insertData(value);
+ break; }
+ }
+ }
+}
+
+uint32_t
+CopyDFW::WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target)
+{
+ (void) docid;
+
+ uint32_t written = 0;
+
+ int idx = gres->GetClass()->GetIndexFromEnumValue(_inputFieldEnumValue);
+ ResEntry *entry = gres->GetEntry(idx);
+
+ DocsumFormat::Appender appender(*target);
+
+ if (entry != NULL &&
+ IsRuntimeCompatible(entry->_type, type)) {
+
+ // copy field
+
+ switch (type) {
+
+ case RES_INT: {
+ written += appender.addInt32(entry->_intval);
+ break; }
+
+ case RES_SHORT: {
+ written += appender.addShort(entry->_intval);
+ break; }
+
+ case RES_BYTE: {
+ written += appender.addByte(entry->_intval);
+ break; }
+
+ case RES_FLOAT: {
+ written += appender.addFloat(entry->_doubleval);
+ break; }
+
+ case RES_DOUBLE: {
+ written += appender.addDouble(entry->_doubleval);
+ break; }
+
+ case RES_INT64: {
+ written += appender.addInt64(entry->_int64val);
+ break; }
+
+ case RES_STRING: {
+ uint32_t len;
+ const char *spt;
+ // resolve field
+ entry->_resolve_field(&spt, &len,
+ &state->_docSumFieldSpace);
+ written += appender.addShortData(spt, len);
+ break; }
+
+ case RES_DATA: {
+ uint32_t len;
+ const char *dpt;
+ // resolve field
+ entry->_resolve_field(&dpt, &len,
+ &state->_docSumFieldSpace);
+ written += appender.addShortData(dpt, len);
+ break; }
+
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING: {
+
+ uint32_t flen = entry->_len;
+ uint32_t slen = entry->_get_length();
+
+ // preserve compression flag
+ target->append(&flen, sizeof(flen));
+ written += sizeof(flen);
+ target->append(entry->_stringval, slen);
+ written += slen;
+
+ break; }
+
+ case RES_LONG_DATA: {
+
+ uint32_t flen = entry->_len;
+ uint32_t dlen = entry->_get_length();
+
+ // preserve compression flag
+ target->append(&flen, sizeof(flen));
+ written += sizeof(flen);
+ target->append(entry->_dataval, dlen);
+ written += dlen;
+
+ break; }
+ }
+ } else {
+ // insert empty field
+ written += appender.addEmpty(type);
+ }
+
+ return written;
+}
+
+//--------------------------------------------------------------------------
+
+} // namespace docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.h
new file mode 100644
index 00000000000..4986697c5bd
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.h
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <vespa/vespalib/data/slime/inserter.h>
+
+namespace search {
+namespace docsummary {
+
+
+using search::IAttributeManager;
+
+class IDocsumFieldWriter
+{
+public:
+ typedef std::unique_ptr<IDocsumFieldWriter> UP;
+ IDocsumFieldWriter() : _index(0) { }
+ virtual ~IDocsumFieldWriter() {}
+
+ static bool IsBinaryCompatible(ResType a, ResType b)
+ { return ResultConfig::IsBinaryCompatible(a, b); }
+
+ static bool IsRuntimeCompatible(ResType a, ResType b)
+ { return ResultConfig::IsRuntimeCompatible(a, b); }
+
+ virtual bool IsGenerated() const = 0;
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target) = 0;
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target) = 0;
+ virtual const vespalib::string & getAttributeName() const { return _empty; }
+ virtual bool isDefaultValue(uint32_t docid, const GetDocsumsState * state) const {
+ (void) docid;
+ (void) state;
+ return false;
+ }
+ void setIndex(size_t v) { _index = v; }
+ size_t getIndex() const { return _index; }
+private:
+ size_t _index;
+ static const vespalib::string _empty;
+};
+
+//--------------------------------------------------------------------------
+
+class EmptyDFW : public IDocsumFieldWriter
+{
+public:
+ EmptyDFW();
+ virtual ~EmptyDFW();
+
+ virtual bool IsGenerated() const { return true; }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+//--------------------------------------------------------------------------
+
+class CopyDFW : public IDocsumFieldWriter
+{
+private:
+ uint32_t _inputFieldEnumValue;
+
+public:
+ CopyDFW();
+ virtual ~CopyDFW();
+
+ bool Init(const ResultConfig & config, const char *inputField);
+
+ virtual bool IsGenerated() const { return false; }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+//--------------------------------------------------------------------------
+
+class AttributeDFWFactory
+{
+private:
+ AttributeDFWFactory();
+public:
+ static IDocsumFieldWriter *create(IAttributeManager & vecMan, const char *vecName);
+};
+
+} // namespace docsummary
+} // namespace search
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp
new file mode 100644
index 00000000000..a837fca3bdb
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchsummary/docsummary/docsumformat.h>
+
+namespace search {
+namespace docsummary {
+
+LOG_SETUP(".searchlib.docsummary.docsumformat");
+
+
+size_t
+DocsumFormat::addByte(search::RawBuf &target, uint8_t value)
+
+{
+ target.append(&value, sizeof(value));
+ return sizeof(value);
+}
+
+size_t
+DocsumFormat::addShort(search::RawBuf &target, uint16_t value)
+{
+ target.append(&value, sizeof(value));
+ return sizeof(value);
+}
+
+size_t
+DocsumFormat::addInt32(search::RawBuf &target, uint32_t value)
+{
+ target.append(&value, sizeof(value));
+ return sizeof(value);
+}
+
+size_t
+DocsumFormat::addFloat(search::RawBuf &target, float value)
+{
+ target.append(&value, sizeof(value));
+ return sizeof(value);
+}
+
+size_t
+DocsumFormat::addDouble(search::RawBuf &target, double value)
+{
+ target.append(&value, sizeof(value));
+ return sizeof(value);
+}
+
+size_t
+DocsumFormat::addInt64(search::RawBuf &target, uint64_t value)
+{
+ target.append(&value, sizeof(value));
+ return sizeof(value);
+}
+
+size_t
+DocsumFormat::addShortData(search::RawBuf &target, const char *buf, uint32_t buflen)
+{
+ uint16_t len = (buflen > 0xffff ? 0xffff : buflen);
+ target.append(&len, sizeof(len));
+ target.append(buf, len);
+
+ return sizeof(len) + len;
+}
+
+size_t
+DocsumFormat::addLongData(search::RawBuf &target, const char *buf, uint32_t buflen)
+{
+ target.append(&buflen, sizeof(buflen));
+ target.append(buf, buflen);
+
+ return sizeof(buflen) + buflen;
+}
+
+size_t
+DocsumFormat::addEmpty(ResType type, search::RawBuf &target)
+{
+ switch (type) {
+ case RES_BYTE:
+ return addByte(target, 0);
+ case RES_SHORT:
+ return addShort(target, 0);
+ case RES_INT:
+ return addInt32(target, 0);
+ case RES_INT64:
+ return addInt64(target, 0L);
+ case RES_FLOAT:
+ return addFloat(target, 0.0f);
+ case RES_DOUBLE:
+ return addDouble(target, 0.0);
+ case RES_STRING:
+ case RES_DATA:
+ return addShortData(target, "", 0);
+ case RES_LONG_STRING:
+ case RES_LONG_DATA:
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ return addLongData(target, "", 0);
+ }
+ LOG_ASSERT(type <= RES_FEATUREDATA);
+ return 0;
+}
+
+} // namespace docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.h
new file mode 100644
index 00000000000..50bce13efd3
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchsummary/docsummary/resultclass.h>
+
+namespace search {
+namespace docsummary {
+
+class DocsumFormat
+{
+public:
+ static size_t addByte(search::RawBuf &target, uint8_t value);
+ static size_t addShort(search::RawBuf &target, uint16_t value);
+ static size_t addInt32(search::RawBuf &target, uint32_t value);
+ static size_t addFloat(search::RawBuf &target, float value);
+ static size_t addDouble(search::RawBuf &target, double value);
+ static size_t addInt64(search::RawBuf &target, uint64_t value);
+ static size_t addShortData(search::RawBuf &target, const char *buf, uint32_t buflen);
+ static size_t addLongData(search::RawBuf &target, const char *buf, uint32_t buflen);
+
+ static size_t addEmpty(ResType type, search::RawBuf &target);
+
+ class Appender {
+ private:
+ search::RawBuf &_target;
+ public:
+ Appender(search::RawBuf &target) : _target(target) {}
+
+ size_t addByte(uint8_t value) {
+ return DocsumFormat::addByte(_target, value);
+ }
+ size_t addShort(uint16_t value) {
+ return DocsumFormat::addShort(_target, value);
+ }
+ size_t addInt32(uint32_t value) {
+ return DocsumFormat::addInt32(_target, value);
+ }
+ size_t addFloat(float value) {
+ return DocsumFormat::addFloat(_target, value);
+ }
+ size_t addDouble(double value) {
+ return DocsumFormat::addDouble(_target, value);
+ }
+ size_t addInt64(uint64_t value) {
+ return DocsumFormat::addInt64(_target, value);
+ }
+ size_t addShortData(const char *buf, uint32_t buflen) {
+ return DocsumFormat::addShortData(_target, buf, buflen);
+ }
+ size_t addLongData(const char *buf, uint32_t buflen) {
+ return DocsumFormat::addLongData(_target, buf, buflen);
+ }
+
+ size_t addEmpty(ResType type) {
+ return DocsumFormat::addEmpty(type, _target);
+ }
+ };
+
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp
new file mode 100644
index 00000000000..f8139dcb2a9
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+
+namespace search {
+namespace docsummary {
+
+GetDocsumsState::GetDocsumsState(GetDocsumsStateCallback &callback)
+ : _args(),
+ _docsumbuf(NULL),
+ _docsumcnt(0),
+ _kwExtractor(NULL),
+ _keywords(NULL),
+ _callback(callback),
+ _dynteaser(),
+ _docSumFieldSpace(_docSumFieldSpaceStore, sizeof(_docSumFieldSpaceStore)), // only alloc buffer if needed
+ _attrCtx(),
+ _attributes(),
+ _jsonStringer(),
+ _parsedLocation(),
+ _summaryFeatures(NULL),
+ _summaryFeaturesCached(false),
+ _rankFeatures(NULL)
+{
+ _dynteaser._docid = static_cast<uint32_t>(-1);
+ _dynteaser._input = static_cast<uint32_t>(-1);
+ _dynteaser._lang = static_cast<uint32_t>(-1);
+ _dynteaser._config = NULL;
+ _dynteaser._query = NULL;
+ _dynteaser._result = NULL;
+}
+
+
+GetDocsumsState::~GetDocsumsState()
+{
+ free(_docsumbuf);
+ free(_keywords);
+ if (_dynteaser._result != NULL) {
+ juniper::ReleaseResult(_dynteaser._result);
+ }
+ if (_dynteaser._query != NULL) {
+ juniper::ReleaseQueryHandle(_dynteaser._query);
+ }
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h
new file mode 100644
index 00000000000..48ba84fb1e4
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/juniper/rpinterface.h>
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchsummary/docsummary/getdocsumargs.h>
+#include <vespa/searchsummary/docsummary/idocsumenvironment.h>
+#include <vespa/searchsummary/docsummary/keywordextractor.h>
+#include <vespa/searchlib/common/featureset.h>
+#include <vespa/searchlib/common/location.h>
+#include <vespa/vespalib/util/jsonwriter.h>
+
+
+namespace search {
+namespace docsummary {
+
+class GetDocsumsState;
+
+class GetDocsumsStateCallback
+{
+public:
+ virtual void FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) = 0;
+ virtual void FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) = 0;
+ virtual void ParseLocation(GetDocsumsState * state) = 0;
+ virtual ~GetDocsumsStateCallback(void) { }
+};
+
+/**
+ * Per-thread memory shared between all docsum field generators.
+ **/
+class GetDocsumsState
+{
+private:
+ GetDocsumsState(const GetDocsumsState &);
+ GetDocsumsState& operator=(const GetDocsumsState &);
+
+public:
+ const search::attribute::IAttributeVector * getAttribute(size_t index) const { return _attributes[index]; }
+
+ GetDocsumArgs _args; // from getdocsums request
+
+ uint32_t *_docsumbuf; // from getdocsums request
+ uint32_t _docsumcnt; // from getdocsums request
+
+ KeywordExtractor *_kwExtractor;
+ char *_keywords; // list of keywords from query
+
+ GetDocsumsStateCallback &_callback;
+
+ struct DynTeaserState {
+ uint32_t _docid; // document id ('cache key')
+ uint32_t _input; // input field ('cache key')
+ uint32_t _lang; // lang field ('cache key')
+ juniper::Config *_config; // juniper config ('cache key')
+ juniper::QueryHandle *_query; // juniper query representation
+ juniper::Result *_result; // juniper analyze result
+ } _dynteaser;
+
+ search::RawBuf _docSumFieldSpace;
+ char _docSumFieldSpaceStore[2048];
+ search::attribute::IAttributeContext::UP _attrCtx;
+ std::vector<const search::attribute::IAttributeVector *> _attributes;
+ vespalib::JSONStringer _jsonStringer;
+
+ // used by AbsDistanceDFW
+ std::unique_ptr<search::common::Location> _parsedLocation;
+
+ // used by SummaryFeaturesDFW
+ FeatureSet::SP _summaryFeatures;
+ bool _summaryFeaturesCached;
+
+ // used by RankFeaturesDFW
+ FeatureSet::SP _rankFeatures;
+
+ GetDocsumsState(GetDocsumsStateCallback &callback);
+ ~GetDocsumsState();
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h
new file mode 100644
index 00000000000..fcdb16e2e05
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <utility>
+
+#include "docsumstorevalue.h"
+
+namespace search {
+namespace docsummary {
+
+
+/**
+ * Interface for object able to fetch docsum blobs based on local
+ * document id.
+ **/
+class IDocsumStore
+{
+public:
+ /**
+ * Convenience typedef.
+ */
+ typedef std::unique_ptr<IDocsumStore> UP;
+
+ /**
+ * Destructor. No cleanup needed for base class.
+ */
+ virtual ~IDocsumStore(void) { }
+
+ /**
+ * @return total number of documents.
+ **/
+ virtual uint32_t getNumDocs() = 0;
+
+ /**
+ * Get a reference to a docsum blob in memory. The docsum store
+ * owns the memory (which is either mmap()ed or from a memory-based
+ * index of some kind).
+ *
+ * @return docsum blob location and size
+ * @param docid local document id
+ * @param useSlimeInsideFields use serialized slime instead of json for structured fields
+ **/
+ virtual DocsumStoreValue getMappedDocsum(uint32_t docid, bool useSlimeInsideFields) = 0;
+
+ /**
+ * Will return default input class used.
+ **/
+ virtual uint32_t getSummaryClassId() const = 0;
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstorevalue.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstorevalue.h
new file mode 100644
index 00000000000..9116ecf1395
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstorevalue.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <utility>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * Simple wrapper class containing the location and size of a docsum
+ * blob located in memory. The memory containing the docsum blob is
+ * owned by the object that emitted the docsum store value object.
+ * Always start with an uint32_t representing the result class ID.
+ **/
+class DocsumStoreValue
+{
+private:
+ std::pair<const char *, uint32_t> _value;
+
+public:
+ /**
+ * Construct object representing an empty docsum blob.
+ **/
+ DocsumStoreValue() : _value(static_cast<const char*>(0), 0) {}
+
+ /**
+ * Construct object encapsulating the given location and size.
+ *
+ * @param pt_ docsum location
+ * @param len_ docsum size
+ **/
+ DocsumStoreValue(const char *pt_, uint32_t len_) : _value(pt_, len_) {}
+
+ /**
+ * @return docsum blob location
+ **/
+ const char *pt() const { return _value.first; }
+
+ /**
+ * @return docsum blob size
+ **/
+ uint32_t len() const { return _value.second; }
+
+ /**
+ * @return pointer to start of serialized docsum fields
+ **/
+ const char *fieldsPt() const { return _value.first + sizeof(uint32_t); }
+
+ /**
+ * @return size of serialized docsum fields
+ **/
+ uint32_t fieldsSz() const { return _value.second - sizeof(uint32_t); }
+
+ /**
+ * @return true if this has a valid blob
+ **/
+ bool valid() const { return (_value.first != 0) && (_value.second >= sizeof(uint32_t)); }
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
new file mode 100644
index 00000000000..8f0df1915db
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
@@ -0,0 +1,517 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/fastlib/text/normwordfolder.h>
+#include <vespa/searchsummary/docsummary/docsumwriter.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/searchsummary/docsummary/docsumstore.h>
+#include <vespa/searchsummary/docsummary/keywordextractor.h>
+#include <vespa/searchsummary/docsummary/docsumformat.h>
+#include <vespa/searchlib/common/transport.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
+
+using namespace vespalib::slime::convenience;
+
+namespace search {
+namespace docsummary {
+
+LOG_SETUP(".searchlib.docsummary.docsumwriter");
+
+
+uint32_t
+IDocsumWriter::slime2RawBuf(const Slime & slime, RawBuf & buf)
+{
+ const uint32_t preUsed = buf.GetUsedLen();
+ const uint32_t magic = ::search::fs4transport::SLIME_MAGIC_ID;
+ buf.append(&magic, sizeof(magic));
+ SlimeOutputRawBufAdapter adapter(buf);
+ vespalib::slime::BinaryFormat::encode(slime, adapter);
+ return (buf.GetUsedLen() - preUsed);
+}
+
+uint32_t
+DynamicDocsumWriter::WriteClassID(uint32_t classID, search::RawBuf *target)
+{
+ uint32_t written = 0;
+
+ target->append(&classID, sizeof(classID));
+ written = sizeof(classID);
+
+ return written;
+}
+
+
+uint32_t
+DynamicDocsumWriter::GenerateDocsum(uint32_t docid,
+ GetDocsumsState *state,
+ const ResultClass *outputClass,
+ search::RawBuf *target)
+{
+ uint32_t written = 0;
+
+ written += WriteClassID(outputClass->GetClassID(), target);
+
+ for (uint32_t i = 0; i < outputClass->GetNumEntries(); i++) {
+
+ const ResConfigEntry *outCfg = outputClass->GetEntry(i);
+ IDocsumFieldWriter *writer = _overrideTable[outCfg->_enumValue];
+ LOG_ASSERT(writer != NULL);
+
+ written += writer->WriteField(docid, NULL, state, outCfg->_type, target);
+ }
+
+ return written;
+}
+
+
+uint32_t
+DynamicDocsumWriter::RepackDocsum(GeneralResult *gres,
+ GetDocsumsState *state,
+ const ResultClass *outputClass,
+ search::RawBuf *target)
+{
+ uint32_t written = 0;
+
+ written += WriteClassID(outputClass->GetClassID(), target);
+
+ DocsumFormat::Appender appender(*target);
+
+ for (uint32_t i = 0; i < outputClass->GetNumEntries(); i++) {
+
+ const ResConfigEntry *outCfg = outputClass->GetEntry(i);
+ IDocsumFieldWriter *writer = _overrideTable[outCfg->_enumValue];
+
+ if (writer != NULL) {
+
+ written += writer->WriteField(gres->GetDocID(), gres, state,
+ outCfg->_type, target);
+
+ } else {
+
+ int inIdx = gres->GetClass()->GetIndexFromEnumValue(outCfg->_enumValue);
+ const ResConfigEntry *inCfg = gres->GetClass()->GetEntry(inIdx);
+
+ if (inCfg != NULL && inCfg->_type == outCfg->_type) {
+
+ // copy field
+
+ const ResEntry *entry = gres->GetEntry(inIdx);
+ LOG_ASSERT(entry != NULL);
+
+ switch (outCfg->_type) {
+
+ case RES_INT: {
+ written += appender.addInt32(entry->_intval);
+ break; }
+
+ case RES_SHORT: {
+ written += appender.addShort(entry->_intval);
+ break; }
+
+ case RES_BYTE: {
+ written += appender.addByte(entry->_intval);
+ break; }
+
+ case RES_FLOAT: {
+ written += appender.addFloat(entry->_doubleval);
+ break; }
+
+ case RES_DOUBLE: {
+ written += appender.addDouble(entry->_doubleval);
+ break; }
+
+ case RES_INT64: {
+ written += appender.addInt64(entry->_int64val);
+ break; }
+
+ case RES_STRING: {
+ uint32_t slen = entry->_stringlen;
+ const char *sval = entry->_stringval;
+ written += appender.addShortData(sval, slen);
+ break; }
+
+ case RES_DATA: {
+ uint32_t dlen = entry->_datalen;
+ const char *dval = entry->_dataval;
+ written += appender.addShortData(dval, dlen);
+ break; }
+
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING: {
+ uint32_t flen = entry->_len;
+ uint32_t slen = entry->_get_length();
+
+ // preserve compression flag
+ target->append(&flen, sizeof(flen));
+ written += sizeof(flen);
+ target->append(entry->_stringval, slen);
+ written += slen;
+ break; }
+
+ case RES_LONG_DATA: {
+ uint32_t flen = entry->_len;
+ uint32_t dlen = entry->_get_length();
+
+ // preserve compression flag
+ target->append(&flen, sizeof(flen));
+ written += sizeof(flen);
+ target->append(entry->_dataval, dlen);
+ written += dlen;
+ break; }
+ }
+
+ } else {
+ // insert empty field
+ written += appender.addEmpty(outCfg->_type);
+ }
+ }
+ } // END for loop
+
+ return written;
+}
+
+
+DynamicDocsumWriter::ResolveClassInfo
+DynamicDocsumWriter::resolveClassInfo(vespalib::stringref outputClassName, uint32_t inputClassId) const
+{
+ DynamicDocsumWriter::ResolveClassInfo rci = resolveOutputClass(outputClassName);
+ if (!rci.mustSkip && !rci.allGenerated) {
+ resolveInputClass(rci, inputClassId);
+ }
+ return rci;
+}
+
+DynamicDocsumWriter::ResolveClassInfo
+DynamicDocsumWriter::resolveOutputClass(vespalib::stringref summaryClass) const
+{
+ DynamicDocsumWriter::ResolveClassInfo result;
+ uint32_t id = _defaultOutputClass;
+ id = _resultConfig->LookupResultClassId(summaryClass, id);
+
+ if (id != ResultConfig::NoClassID()) {
+ const ResultClass *oC = _resultConfig->LookupResultClass(id);
+ if (oC == NULL) {
+ LOG(warning, "Illegal docsum class requested: %d, using empty docsum for documents", id);
+ result.mustSkip = true;
+ } else {
+ result.outputClass = oC;
+ const ResultClass::DynamicInfo *rcInfo = oC->getDynamicInfo();
+ if (rcInfo->_generateCnt == oC->GetNumEntries()) {
+ LOG_ASSERT(rcInfo->_overrideCnt == rcInfo->_generateCnt);
+ result.allGenerated = true;
+ }
+ result.outputClassInfo = rcInfo;
+ }
+ }
+ result.outputClassId = id;
+ return result;
+}
+
+void
+DynamicDocsumWriter::resolveInputClass(ResolveClassInfo &rci, uint32_t id) const
+{
+ rci.inputClass = _resultConfig->LookupResultClass(id);
+ if (rci.inputClass == NULL) {
+ rci.mustSkip = true;
+ return;
+ }
+ if (rci.outputClass == NULL) {
+ LOG_ASSERT(rci.outputClassId == ResultConfig::NoClassID());
+ rci.outputClassId = id;
+ rci.outputClass = rci.inputClass;
+ rci.outputClassInfo = rci.inputClass->getDynamicInfo();
+ }
+ if ((rci.inputClass == rci.outputClass) && (rci.outputClassInfo->_overrideCnt == 0)) {
+ rci.mustRepack = false;
+ }
+}
+
+void
+DynamicDocsumWriter::resolveInputClass(ResolveClassInfo &rci, DocsumStoreValue blob) const
+{
+ uint32_t id = _resultConfig->GetClassID(blob.pt(), blob.len());
+ resolveInputClass(rci, id);
+}
+
+
+uint32_t
+DynamicDocsumWriter::oldWriteDocsum(uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ search::RawBuf *target)
+{
+ ResolveClassInfo rci = resolveOutputClass(state->_args.getResultClassName());
+ if (rci.mustSkip) {
+ return 0;
+ } else if (rci.allGenerated) {
+ // generate docsum entry on-the-fly
+ return GenerateDocsum(docid, state, rci.outputClass, target);
+ }
+ // look up docsum entry
+ DocsumStoreValue value = docinfos->getMappedDocsum(docid, false);
+ resolveInputClass(rci, value);
+ if (rci.mustSkip) {
+ return 0;
+ } else if (rci.mustRepack) {
+ // re-pack docsum blob
+ GeneralResult gres(rci.inputClass, 0, docid, 0);
+ if (gres.inplaceUnpack(value)) {
+ return RepackDocsum(&gres, state, rci.outputClass, target);
+ } else { // unpack failed
+ LOG(error, "Unpack failed: illegal docsum entry for document %d", docid);
+ }
+ } else {
+ // pass-through docsum blob
+ target->append(value.pt(), value.len());
+ return value.len();
+ }
+ return 0;
+}
+
+
+static void convertEntry(GetDocsumsState *state,
+ const ResConfigEntry *resCfg,
+ const ResEntry *entry,
+ Inserter &inserter,
+ Slime &slime)
+{
+ using vespalib::slime::BinaryFormat;
+ const char *ptr;
+ uint32_t len;
+
+ LOG_ASSERT(resCfg != 0 && entry != 0);
+ switch (resCfg->_type) {
+ case RES_INT:
+ case RES_SHORT:
+ case RES_BYTE:
+ inserter.insertLong(entry->_intval);
+ break;
+ case RES_FLOAT:
+ case RES_DOUBLE:
+ inserter.insertDouble(entry->_doubleval);
+ break;
+ case RES_INT64:
+ inserter.insertLong(entry->_int64val);
+ break;
+ case RES_STRING:
+ case RES_LONG_STRING:
+ case RES_FEATUREDATA:
+ case RES_XMLSTRING:
+ entry->_resolve_field(&ptr, &len, &state->_docSumFieldSpace);
+ inserter.insertString(Memory(ptr, len));
+ break;
+ case RES_DATA:
+ case RES_LONG_DATA:
+ entry->_resolve_field(&ptr, &len, &state->_docSumFieldSpace);
+ inserter.insertData(Memory(ptr, len));
+ break;
+ case RES_JSONSTRING:
+ entry->_resolve_field(&ptr, &len, &state->_docSumFieldSpace);
+ if (len != 0) {
+ // note: 'JSONSTRING' really means 'structured data',
+ // and in this code path we depend on calling the
+ // getMappedDocsum api with flag useSlimeInsideFields=true
+ size_t d = BinaryFormat::decode_into(Memory(ptr, len), slime, inserter);
+ if (d != len) {
+ LOG(warning, "could not decode %u bytes: %zu bytes decoded", len, d);
+ }
+ }
+ break;
+ }
+}
+
+
+void
+DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci,
+ uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ vespalib::Slime & slime,
+ vespalib::slime::Inserter & topInserter)
+{
+ if (rci.allGenerated) {
+ // generate docsum entry on-the-fly
+ vespalib::slime::Cursor & docsum = topInserter.insertObject();
+ for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) {
+ const ResConfigEntry *resCfg = rci.outputClass->GetEntry(i);
+ IDocsumFieldWriter *writer = _overrideTable[resCfg->_enumValue];
+ if (! writer->isDefaultValue(docid, state)) {
+ const Memory field_name(resCfg->_bindname.data(),
+ resCfg->_bindname.size());
+ ObjectInserter inserter(docsum, field_name);
+ writer->insertField(docid, NULL, state, resCfg->_type, inserter);
+ }
+ }
+ } else {
+ // look up docsum entry
+ DocsumStoreValue value = docinfos->getMappedDocsum(docid, true);
+ // re-pack docsum blob
+ GeneralResult gres(rci.inputClass, 0, docid, 0);
+ if (! gres.inplaceUnpack(value)) {
+ LOG(error, "Unpack failed: illegal docsum entry for document %d", docid);
+ topInserter.insertNix();
+ return;
+ }
+ vespalib::slime::Cursor & docsum = topInserter.insertObject();
+ for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) {
+ const ResConfigEntry *outCfg = rci.outputClass->GetEntry(i);
+ IDocsumFieldWriter *writer = _overrideTable[outCfg->_enumValue];
+ const Memory field_name(outCfg->_bindname.data(), outCfg->_bindname.size());
+ ObjectInserter inserter(docsum, field_name);
+ if (writer != NULL) {
+ writer->insertField(docid, &gres, state, outCfg->_type, inserter);
+ } else {
+ if (rci.inputClass == rci.outputClass) {
+ convertEntry(state, outCfg, gres.GetEntry(i), inserter, slime);
+ } else {
+ int inIdx = rci.inputClass->GetIndexFromEnumValue(outCfg->_enumValue);
+ const ResConfigEntry *inCfg = rci.inputClass->GetEntry(inIdx);
+ if (inCfg != NULL && inCfg->_type == outCfg->_type) {
+ // copy field
+ const ResEntry *entry = gres.GetEntry(inIdx);
+ LOG_ASSERT(entry != NULL);
+ convertEntry(state, outCfg, entry, inserter, slime);
+ }
+ }
+ }
+ }
+ }
+}
+
+
+DynamicDocsumWriter::DynamicDocsumWriter( ResultConfig *config, KeywordExtractor *extractor)
+ : _resultConfig(config),
+ _keywordExtractor(extractor),
+ _defaultOutputClass(ResultConfig::NoClassID()),
+ _numClasses(config->GetNumResultClasses()),
+ _numEnumValues(config->GetFieldNameEnum().GetNumEntries()),
+ _classInfoTable(NULL),
+ _overrideTable(NULL)
+{
+ LOG_ASSERT(config != NULL);
+ _classInfoTable = new ResultClass::DynamicInfo[_numClasses];
+ _overrideTable = new IDocsumFieldWriter*[_numEnumValues];
+
+ uint32_t i = 0;
+ for (ResultConfig::iterator it(config->begin()), mt(config->end()); it != mt; it++, i++) {
+ _classInfoTable[i]._overrideCnt = 0;
+ _classInfoTable[i]._generateCnt = 0;
+ it->setDynamicInfo(&(_classInfoTable[i]));
+ }
+ LOG_ASSERT(i == _numClasses);
+
+ for (i = 0; i < _numEnumValues; i++)
+ _overrideTable[i] = NULL;
+}
+
+
+DynamicDocsumWriter::~DynamicDocsumWriter()
+{
+ delete _resultConfig;
+ delete _keywordExtractor;
+
+ delete [] _classInfoTable;
+
+ for (uint32_t i = 0; i < _numEnumValues; i++)
+ delete _overrideTable[i];
+ delete [] _overrideTable;
+
+}
+
+bool
+DynamicDocsumWriter::SetDefaultOutputClass(uint32_t classID)
+{
+ const ResultClass *resClass = _resultConfig->LookupResultClass(classID);
+
+ if (resClass == NULL ||
+ _defaultOutputClass != ResultConfig::NoClassID())
+ {
+ if (resClass == NULL) {
+ LOG(warning, "cannot set default output docsum class to %d; class not defined", classID);
+ } else if (_defaultOutputClass != ResultConfig::NoClassID()) {
+ LOG(warning, "cannot set default output docsum class to %d; value already set", classID);
+ }
+ return false;
+ }
+ _defaultOutputClass = classID;
+ return true;
+}
+
+
+bool
+DynamicDocsumWriter::Override(const char *fieldName, IDocsumFieldWriter *writer)
+{
+ uint32_t fieldEnumValue = _resultConfig->GetFieldNameEnum().Lookup(fieldName);
+
+ if (fieldEnumValue >= _numEnumValues ||
+ _overrideTable[fieldEnumValue] != NULL)
+ {
+
+ if (fieldEnumValue >= _numEnumValues) {
+ LOG(warning, "cannot override docsum field '%s'; undefined field name", fieldName);
+ } else if (_overrideTable[fieldEnumValue] != NULL) {
+ LOG(warning, "cannot override docsum field '%s'; already overridden", fieldName);
+ }
+ delete writer;
+ return false;
+ }
+
+ writer->setIndex(fieldEnumValue);
+ _overrideTable[fieldEnumValue] = writer;
+
+ for (ResultConfig::iterator it(_resultConfig->begin()), mt(_resultConfig->end()); it != mt; it++) {
+
+ if (it->GetIndexFromEnumValue(fieldEnumValue) >= 0) {
+ ResultClass::DynamicInfo *info = it->getDynamicInfo();
+ info->_overrideCnt++;
+ if (writer->IsGenerated())
+ info->_generateCnt++;
+ }
+ }
+
+ return true;
+}
+
+
+void
+DynamicDocsumWriter::InitState(IAttributeManager & attrMan, GetDocsumsState *state)
+{
+ state->_kwExtractor = _keywordExtractor;
+ state->_attrCtx = attrMan.createContext();
+ state->_attributes.resize(_numEnumValues);
+ for (size_t i(0); i < state->_attributes.size(); i++) {
+ const IDocsumFieldWriter *fw = _overrideTable[i];
+ if (fw) {
+ const vespalib::string & attributeName = fw->getAttributeName();
+ if (!attributeName.empty()) {
+ state->_attributes[i] = state->_attrCtx->getAttribute(attributeName);
+ }
+ }
+ }
+}
+
+
+uint32_t
+DynamicDocsumWriter::WriteDocsum(uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ search::RawBuf *target)
+{
+ if ((state->_args.getFlags() & ::search::fs4transport::GDFLAG_ALLOW_SLIME) != 0) {
+ vespalib::Slime slime;
+ vespalib::slime::SlimeInserter inserter(slime);
+ insertDocsum(resolveClassInfo(state->_args.getResultClassName(), docinfos->getSummaryClassId()), docid, state, docinfos, slime, inserter);
+ return slime2RawBuf(slime, *target);
+ }
+ return oldWriteDocsum(docid, state, docinfos, target);
+}
+
+
+} // namespace search::docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h
new file mode 100644
index 00000000000..0dd7204ba16
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+#include <vespa/searchsummary/docsummary/docsumstore.h>
+#include <vespa/searchsummary/docsummary/keywordextractor.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/fastlib/text/unicodeutil.h>
+#include <vespa/fastlib/text/wordfolder.h>
+#include "juniperproperties.h"
+
+using search::IAttributeManager;
+
+namespace search {
+namespace docsummary {
+
+class IDocsumWriter
+{
+public:
+ struct ResolveClassInfo {
+ bool mustSkip;
+ bool allGenerated;
+ bool mustRepack;
+ uint32_t outputClassId;
+ const ResultClass *outputClass;
+ const ResultClass::DynamicInfo *outputClassInfo;
+ const ResultClass *inputClass;
+ ResolveClassInfo()
+ : mustSkip(false), allGenerated(false), mustRepack(true),
+ outputClassId(ResultConfig::NoClassID()),
+ outputClass(NULL), outputClassInfo(NULL), inputClass(NULL)
+ {}
+ };
+
+ virtual ~IDocsumWriter() {}
+ virtual void InitState(IAttributeManager & attrMan, GetDocsumsState *state) = 0;
+ virtual uint32_t WriteDocsum(uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ search::RawBuf *target) = 0;
+ virtual void insertDocsum(const ResolveClassInfo & rci,
+ uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ vespalib::Slime & slime,
+ vespalib::slime::Inserter & target) = 0;
+ virtual ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName, uint32_t inputClassId) const = 0;
+
+ static uint32_t slime2RawBuf(const vespalib::Slime & slime, RawBuf & buf);
+};
+
+//--------------------------------------------------------------------------
+
+class DynamicDocsumWriter : public IDocsumWriter
+{
+private:
+ DynamicDocsumWriter(const DynamicDocsumWriter &);
+ DynamicDocsumWriter& operator=(const DynamicDocsumWriter &);
+
+
+private:
+ ResultConfig *_resultConfig;
+ KeywordExtractor *_keywordExtractor;
+ uint32_t _defaultOutputClass;
+ uint32_t _numClasses;
+ uint32_t _numEnumValues;
+ ResultClass::DynamicInfo *_classInfoTable;
+ IDocsumFieldWriter **_overrideTable;
+
+ uint32_t WriteClassID(uint32_t classID, search::RawBuf *target);
+
+ uint32_t GenerateDocsum(uint32_t docid,
+ GetDocsumsState *state,
+ const ResultClass *outputClass,
+ search::RawBuf *target);
+
+ uint32_t RepackDocsum(GeneralResult *gres,
+ GetDocsumsState *state,
+ const ResultClass *outputClass,
+ search::RawBuf *target);
+
+ void resolveInputClass(ResolveClassInfo &rci, uint32_t id) const;
+ void resolveInputClass(ResolveClassInfo &rci, DocsumStoreValue blob) const;
+ ResolveClassInfo resolveOutputClass(vespalib::stringref outputClassName) const;
+
+ uint32_t oldWriteDocsum(uint32_t docid, GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ search::RawBuf *target);
+
+public:
+ DynamicDocsumWriter(ResultConfig *config, KeywordExtractor *extractor);
+ virtual ~DynamicDocsumWriter();
+
+ ResultConfig *GetResultConfig() { return _resultConfig; }
+
+ bool SetDefaultOutputClass(uint32_t classID);
+ bool Override(const char *fieldName, IDocsumFieldWriter *writer);
+ void InitState(IAttributeManager & attrMan, GetDocsumsState *state) override;
+ uint32_t WriteDocsum(uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ search::RawBuf *target) override;
+
+ void insertDocsum(const ResolveClassInfo & outputClassInfo,
+ uint32_t docid,
+ GetDocsumsState *state,
+ IDocsumStore *docinfos,
+ vespalib::Slime & slime,
+ vespalib::slime::Inserter & target) override;
+
+ ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName, uint32_t inputClassId) const override;
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp
new file mode 100644
index 00000000000..8c2c0b2e65c
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp
@@ -0,0 +1,494 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <cstdio>
+#include <vespa/log/log.h>
+#include "juniperdfw.h"
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchlib/queryeval/split_float.h>
+
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchsummary/docsummary/docsumwriter.h>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+#include <vespa/searchsummary/docsummary/keywordextractor.h>
+#include <vespa/searchsummary/docsummary/docsumformat.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/juniper/config.h>
+
+LOG_SETUP(".searchlib.docsummary.dynamicteaserdfw");
+
+namespace juniper
+{
+
+
+struct ExplicitItemData
+{
+ const char *_index;
+ uint32_t _indexlen;
+ const char *_term;
+ uint32_t _termlen;
+ uint32_t _weight;
+
+ ExplicitItemData()
+ : _index(NULL), _indexlen(0), _term(NULL), _termlen(0), _weight(0)
+ {}
+
+ ExplicitItemData(const char *index, uint32_t indexlen, const char* term,
+ uint32_t termlen, uint32_t weight = 0)
+ : _index(index), _indexlen(indexlen), _term(term), _termlen(termlen), _weight(weight)
+ {}
+};
+
+
+
+/**
+ * This struct is used to point to the traversal state located on
+ * the stack of the IQuery Traverse method. This is needed because
+ * the Traverse method is const.
+ **/
+struct QueryItem
+{
+ search::SimpleQueryStackDumpIterator *_si;
+ const ExplicitItemData *_data;
+ QueryItem() : _si(NULL), _data(NULL) {}
+ QueryItem(search::SimpleQueryStackDumpIterator *si) : _si(si), _data(NULL) {}
+ QueryItem(ExplicitItemData *data) : _si(NULL), _data(data) {}
+private:
+ QueryItem(const QueryItem&);
+ QueryItem& operator= (const QueryItem&);
+};
+};
+
+namespace search {
+class Property;
+
+namespace fef {
+class TermVisitor : public IPropertiesVisitor
+{
+public:
+ juniper::IQueryVisitor *_visitor;
+ juniper::QueryItem _item;
+
+ TermVisitor(juniper::IQueryVisitor *visitor) :
+ _visitor(visitor), _item() {}
+
+ virtual void visitProperty(const Property::Value &key, const Property &values);
+
+};
+
+void
+TermVisitor::visitProperty(const Property::Value &key, const Property &values)
+{
+ juniper::ExplicitItemData data;
+ juniper::QueryItem item(&data);
+ int index = 0;
+ int numBlocks = atoi(values.getAt(index++).c_str());
+ data._index = key.c_str();
+ data._indexlen = key.length();
+
+ _visitor->VisitAND(&item, numBlocks);
+
+ for (int i = 0; i < numBlocks; i++) {
+ const Property::Value * s = & values.getAt(index++);
+ if ((*s)[0] == '"') {
+ s = & values.getAt(index++);
+ int phraseLen = atoi(s->c_str());
+ _visitor->VisitPHRASE(&item, phraseLen);
+ s = & values.getAt(index++);
+ while ((*s)[0] != '"') {
+ data._term = s->c_str();
+ data._termlen = s->length();
+ _visitor->VisitKeyword(&item, s->c_str(), s->length());
+ s = & values.getAt(index++);
+ }
+ } else {
+ data._term = s->c_str();
+ data._termlen = s->length();
+ _visitor->VisitKeyword(&item, s->c_str(), s->length());
+ }
+ }
+}
+
+}
+
+namespace docsummary {
+
+class JuniperQueryAdapter : public juniper::IQuery
+{
+private:
+ JuniperQueryAdapter(const JuniperQueryAdapter&);
+ JuniperQueryAdapter operator= (const JuniperQueryAdapter&);
+
+ KeywordExtractor *_kwExtractor;
+ const vespalib::stringref _buf;
+ const search::fef::Properties *_highlightTerms;
+ juniper::IQueryVisitor *_visitor;
+
+public:
+ JuniperQueryAdapter(KeywordExtractor *kwExtractor,
+ const vespalib::stringref &buf,
+ const search::fef::Properties *highlightTerms = NULL)
+ : _kwExtractor(kwExtractor), _buf(buf), _highlightTerms(highlightTerms), _visitor(NULL) {}
+
+ // TODO: put this functionality into the stack dump iterator
+ bool SkipItem(search::SimpleQueryStackDumpIterator *iterator) const
+ {
+ uint32_t skipCount = iterator->getArity();
+
+ while (skipCount > 0) {
+ if (!iterator->next())
+ return false; // stack too small
+ skipCount = skipCount - 1 + iterator->getArity();
+ }
+ return true;
+ }
+
+ virtual bool Traverse(juniper::IQueryVisitor *v) const;
+
+ virtual int Weight(const juniper::QueryItem* item) const
+ {
+ if (item->_si != NULL) {
+ return item->_si->GetWeight().percent();
+ } else {
+ return item->_data->_weight;
+ }
+ }
+ virtual juniper::ItemCreator Creator(const juniper::QueryItem* item) const
+ {
+ // cast master: Knut Omang
+ if (item->_si != NULL) {
+ return (juniper::ItemCreator) item->_si->getCreator();
+ } else {
+ return juniper::CREA_ORIG;
+ }
+ }
+ virtual const char *Index(const juniper::QueryItem* item, size_t *len) const
+ {
+ if (item->_si != NULL) {
+ const char *ret;
+ item->_si->getIndexName(&ret, len);
+ return ret;
+ } else {
+ return item->_data->_index;
+ }
+
+ }
+ virtual bool UsefulIndex(const juniper::QueryItem* item) const
+ {
+ const char *buf;
+ size_t buflen;
+
+ if (_kwExtractor == NULL)
+ return true;
+
+ if (item->_si != NULL) {
+ item->_si->getIndexName(&buf, &buflen);
+ } else {
+ buf = item->_data->_index;
+ buflen = item->_data->_indexlen;
+ }
+ return _kwExtractor->IsLegalIndex(buf, buflen);
+ }
+};
+
+
+
+bool
+JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const
+{
+ bool rc = true;
+ search::SimpleQueryStackDumpIterator iterator(_buf);
+ juniper::QueryItem item(&iterator);
+ const char *buf;
+ size_t buflen;
+
+ if (_highlightTerms->numKeys() > 0) {
+ v->VisitAND(&item, 2);
+ }
+ while (rc && iterator.next()) {
+ bool isSpecialToken = search::ParseItem::getFlag(iterator.getFlags(), search::ParseItem::IFLAG_SPECIALTOKEN);
+ switch (iterator.getType()) {
+ case search::ParseItem::ITEM_OR:
+ case search::ParseItem::ITEM_WEAK_AND:
+ case search::ParseItem::ITEM_EQUIV:
+ case search::ParseItem::ITEM_WORD_ALTERNATIVES:
+ // XXX unhandled
+ // case search::ParseItem::ITEM_WAND:
+ // case search::ParseItem::ITEM_WEIGHTED_SET:
+ // case search::ParseItem::ITEM_DOT_PRODUCT:
+ if (!v->VisitOR(&item, iterator.getArity()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_AND:
+ if (!v->VisitAND(&item, iterator.getArity()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_NOT:
+ if (!v->VisitANDNOT(&item, iterator.getArity()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_RANK:
+ if (!v->VisitRANK(&item, iterator.getArity()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_TERM:
+ case search::ParseItem::ITEM_EXACTSTRINGTERM:
+ case search::ParseItem::ITEM_PURE_WEIGHTED_STRING:
+ // XXX unhandled
+ // case search::ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ iterator.getTerm(&buf, &buflen);
+ v->VisitKeyword(&item, buf, buflen, false, isSpecialToken);
+ break;
+ case search::ParseItem::ITEM_NUMTERM:
+ iterator.getTerm(&buf, &buflen);
+ {
+ vespalib::string termStr(buf, buflen);
+ queryeval::SplitFloat splitter(termStr);
+ if (splitter.parts() > 1) {
+ if (v->VisitPHRASE(&item, splitter.parts())) {
+ for (size_t i = 0; i < splitter.parts(); ++i) {
+ v->VisitKeyword(&item,
+ splitter.getPart(i).c_str(),
+ splitter.getPart(i).size(), false);
+ }
+ }
+ } else if (splitter.parts() == 1) {
+ v->VisitKeyword(&item,
+ splitter.getPart(0).c_str(),
+ splitter.getPart(0).size(), false);
+ } else {
+ v->VisitKeyword(&item, buf, buflen, false, true);
+ }
+ }
+ break;
+ case search::ParseItem::ITEM_PHRASE:
+ if (!v->VisitPHRASE(&item, iterator.getArity()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_PAREN:
+ if (!v->VisitOther(&item, iterator.getArity()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_PREFIXTERM:
+ case search::ParseItem::ITEM_SUBSTRINGTERM:
+ // XXX unhandled
+ // case search::ParseItem::ITEM_SUFFIXTERM:
+ iterator.getTerm(&buf, &buflen);
+ v->VisitKeyword(&item, buf, buflen, true, isSpecialToken);
+ break;
+ case search::ParseItem::ITEM_ANY:
+#if (JUNIPER_RP_API_MINOR_VERSION >= 1)
+ if (!v->VisitANY(&item, iterator.getArity()))
+#else
+ if (!v->VisitOR(&item, iterator.getArity()))
+#endif
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_NEAR:
+ if (!v->VisitNEAR(&item, iterator.getArity(),iterator.getArg1()))
+ rc = SkipItem(&iterator);
+ break;
+ case search::ParseItem::ITEM_ONEAR:
+ if (!v->VisitWITHIN(&item, iterator.getArity(),iterator.getArg1()))
+ rc = SkipItem(&iterator);
+ break;
+ // XXX unhandled
+ // case search::ParseItem::ITEM_REGEXP:
+ // case search::ParseItem::ITEM_PREDICATE_QUERY:
+ default:
+ rc = false;
+ }
+ }
+
+ if (_highlightTerms->numKeys() > 1) {
+ v->VisitAND(&item, _highlightTerms->numKeys());
+ }
+ fef::TermVisitor tv(v);
+ _highlightTerms->visitProperties(tv);
+
+ return rc;
+}
+
+JuniperDFW::JuniperDFW(juniper::Juniper * juniper)
+ : _inputFieldEnumValue(static_cast<uint32_t>(-1))
+ , _juniperConfig()
+ , _langFieldEnumValue(static_cast<uint32_t>(-1))
+ , _juniper(juniper)
+{
+}
+
+
+JuniperDFW::~JuniperDFW()
+{
+}
+
+bool
+JuniperDFW::Init(
+ const char *fieldName,
+ const char *langFieldName,
+ const ResultConfig & config,
+ const char *inputField)
+{
+ bool rc = true;
+ const util::StringEnum & enums(config.GetFieldNameEnum());
+ if (langFieldName != NULL)
+ _langFieldEnumValue = enums.Lookup(langFieldName);
+ _juniperConfig = _juniper->CreateConfig(fieldName);
+ if (_juniperConfig.get() == NULL) {
+ LOG(warning, "could not create juniper config for field '%s'", fieldName);
+ rc = false;
+ }
+
+ _inputFieldEnumValue = enums.Lookup(inputField);
+
+ if (_inputFieldEnumValue >= enums.GetNumEntries()) {
+ LOG(warning, "no docsum format contains field '%s'; dynamic teasers will be empty",
+ inputField);
+ }
+ return rc;
+}
+
+bool
+JuniperTeaserDFW::Init(
+ const char *fieldName,
+ const char *langFieldName,
+ const ResultConfig & config,
+ const char *inputField)
+{
+ bool rc = JuniperDFW::Init(fieldName, langFieldName, config, inputField);
+
+ for (ResultConfig::const_iterator it(config.begin()), mt(config.end()); rc && it != mt; it++) {
+
+ const ResConfigEntry *entry =
+ it->GetEntry(it->GetIndexFromEnumValue(_inputFieldEnumValue));
+
+ if (entry != NULL &&
+ !IsRuntimeCompatible(entry->_type, RES_STRING) &&
+ !IsRuntimeCompatible(entry->_type, RES_DATA))
+ {
+ LOG(warning, "cannot use docsum field '%s' as input to dynamicteaser; bad type in result class %d (%s)",
+ inputField, it->GetClassID(), it->GetClassName());
+ rc = false;
+ }
+ }
+ return rc;
+}
+
+vespalib::string
+DynamicTeaserDFW::makeDynamicTeaser(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state)
+{
+ if (state->_dynteaser._query == NULL) {
+ JuniperQueryAdapter iq(state->_kwExtractor,
+ state->_args.getStackDump(),
+ &state->_args.highlightTerms());
+ state->_dynteaser._query = _juniper->CreateQueryHandle(iq, NULL);
+ }
+
+ if (docid != state->_dynteaser._docid ||
+ _inputFieldEnumValue != state->_dynteaser._input ||
+ _langFieldEnumValue != state->_dynteaser._lang ||
+ !juniper::AnalyseCompatible(_juniperConfig.get(), state->_dynteaser._config)) {
+ LOG(debug, "makeDynamicTeaser: docid (%d,%d), fieldenum (%d,%d), lang (%d,%d) analyse %s",
+ docid, state->_dynteaser._docid,
+ _inputFieldEnumValue, state->_dynteaser._input,
+ _langFieldEnumValue, state->_dynteaser._lang,
+ (juniper::AnalyseCompatible(_juniperConfig.get(), state->_dynteaser._config) ? "no" : "yes"));
+
+ if (state->_dynteaser._result != NULL)
+ juniper::ReleaseResult(state->_dynteaser._result);
+
+ state->_dynteaser._docid = docid;
+ state->_dynteaser._input = _inputFieldEnumValue;
+ state->_dynteaser._lang = _langFieldEnumValue;
+ state->_dynteaser._config = _juniperConfig.get();
+ state->_dynteaser._result = NULL;
+
+ int idx = gres->GetClass()->GetIndexFromEnumValue(_inputFieldEnumValue);
+ ResEntry *entry = gres->GetEntry(idx);
+
+ if (entry != NULL &&
+ state->_dynteaser._query != NULL) {
+
+ // obtain Juniper input
+ const char *buf;
+ uint32_t buflen;
+
+ entry->_resolve_field(&buf, &buflen,
+ &state->_docSumFieldSpace);
+
+ if (LOG_WOULD_LOG(spam)) {
+ std::ostringstream hexDump;
+ hexDump << vespalib::HexDump(buf, buflen);
+ LOG(spam, "makeDynamicTeaser: docid=%d, input='%s', hexdump:\n%s",
+ docid, std::string(buf, buflen).c_str(),
+ hexDump.str().c_str());
+ }
+
+ uint32_t langid = static_cast<uint32_t>(-1);
+
+ state->_dynteaser._result =
+ juniper::Analyse(_juniperConfig.get(), state->_dynteaser._query,
+ buf, buflen, docid, _inputFieldEnumValue, langid);
+ }
+ }
+
+ juniper::Summary *teaser = (state->_dynteaser._result != NULL)
+ ? juniper::GetTeaser(state->_dynteaser._result, _juniperConfig.get())
+ : NULL;
+
+ if (LOG_WOULD_LOG(debug)) {
+ std::ostringstream hexDump;
+ if (teaser != NULL) {
+ hexDump << vespalib::HexDump(teaser->Text(), teaser->Length());
+ }
+ LOG(debug, "makeDynamicTeaser: docid=%d, teaser='%s', hexdump:\n%s",
+ docid, (teaser != NULL ? std::string(teaser->Text(), teaser->Length()).c_str() : "NULL"),
+ hexDump.str().c_str());
+ }
+
+ if (teaser != NULL) {
+ return vespalib::string(teaser->Text(),
+ teaser->Length());
+ } else {
+ return vespalib::string();
+ }
+}
+
+uint32_t
+DynamicTeaserDFW::WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target)
+{
+ vespalib::string teaser = makeDynamicTeaser(docid, gres, state);
+
+ bool isLong = IsBinaryCompatible(type, RES_LONG_STRING);
+ if (isLong) {
+ return DocsumFormat::addLongData(*target, teaser.c_str(), teaser.size());
+ } else {
+ return DocsumFormat::addShortData(*target, teaser.c_str(), teaser.size());
+ }
+}
+
+void
+DynamicTeaserDFW::insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType,
+ vespalib::slime::Inserter &target)
+{
+ vespalib::string teaser = makeDynamicTeaser(docid, gres, state);
+ vespalib::slime::Memory value(teaser.c_str(), teaser.size());
+ target.insertString(value);
+}
+
+} // namespace docsummary
+} // namespace search
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp
new file mode 100644
index 00000000000..e15f0b8c986
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp
@@ -0,0 +1,195 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.docsummary.geoposdfw");
+#include "geoposdfw.h"
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/common/documentlocations.h>
+#include <vespa/searchlib/common/location.h>
+#include <vespa/vespalib/util/jsonwriter.h>
+
+namespace search {
+namespace docsummary {
+
+using attribute::IAttributeVector;
+using attribute::IAttributeContext;
+
+GeoPositionDFW::GeoPositionDFW(const vespalib::string & attrName) :
+ AttrDFW(attrName)
+{
+}
+
+namespace {
+
+void fmtZcurve(int64_t zval, vespalib::slime::Inserter &target)
+{
+ int32_t docx = 0;
+ int32_t docy = 0;
+ vespalib::geo::ZCurve::decode(zval, &docx, &docy);
+ if (docx == 0 && docy == INT_MIN) {
+ LOG(spam, "skipping empty zcurve value");
+ } else {
+ vespalib::slime::Cursor &obj = target.insertObject();
+ obj.setLong("y", docy);
+ obj.setLong("x", docx);
+ }
+}
+
+void fmtZcurve(int64_t zval, vespalib::JSONWriter json)
+{
+ int32_t docx = 0;
+ int32_t docy = 0;
+ vespalib::geo::ZCurve::decode(zval, &docx, &docy);
+ json.beginObject();
+ json.appendKey("y"); json.appendInt64(docy);
+ json.appendKey("x"); json.appendInt64(docx);
+ json.endObject();
+}
+
+} // namespace <unnamed>
+
+vespalib::asciistream
+GeoPositionDFW::formatField(const IAttributeVector & attribute, uint32_t docid)
+{
+ vespalib::asciistream target;
+ vespalib::JSONWriter json(target);
+
+ if (attribute.hasMultiValue()) {
+ uint32_t entries = attribute.getValueCount(docid);
+ LOG(debug, "docid=%d, entries=%d", docid, entries);
+ json.beginArray();
+ if (attribute.hasWeightedSetType()) {
+ std::vector<IAttributeVector::WeightedInt> elements(entries);
+ entries = attribute.get(docid, &elements[0], entries);
+ for (uint32_t i = 0; i < entries; ++i) {
+ json.beginObject();
+ int64_t pos = elements[i].getValue();
+ json.appendKey("item");
+ fmtZcurve(pos, json);
+ json.appendKey("weight");
+ json.appendInt64(elements[i].getWeight());
+ json.endObject();
+ }
+ } else {
+ std::vector<IAttributeVector::largeint_t> elements(16);
+ uint32_t numValues = attribute.get(docid, &elements[0], elements.size());
+ if (numValues > elements.size()) {
+ elements.resize(numValues);
+ numValues = attribute.get(docid, &elements[0], elements.size());
+ assert(numValues <= elements.size());
+ }
+ LOG(debug, "docid=%d, numValues=%d", docid, numValues);
+ for (uint32_t i = 0; i < numValues; i++) {
+ int64_t pos = elements[i];
+ fmtZcurve(pos, json);
+ }
+ }
+ } else {
+ int64_t pos = attribute.getInt(docid);
+ LOG(debug, "docid=%d, pos=%ld", docid, pos);
+ fmtZcurve(pos, json);
+ }
+ return target;
+}
+
+void
+GeoPositionDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * dsState,
+ ResType,
+ vespalib::slime::Inserter &target)
+{
+ using vespalib::slime::Cursor;
+ using vespalib::slime::ObjectInserter;
+ using vespalib::slime::ArrayInserter;
+
+ const IAttributeVector & attribute = vec(*dsState);
+ if (attribute.hasMultiValue()) {
+ uint32_t entries = attribute.getValueCount(docid);
+ Cursor &arr = target.insertArray();
+ if (attribute.hasWeightedSetType()) {
+ std::vector<IAttributeVector::WeightedInt> elements(entries);
+ entries = attribute.get(docid, &elements[0], entries);
+ for (uint32_t i = 0; i < entries; ++i) {
+ Cursor &elem = arr.addObject();
+ int64_t pos = elements[i].getValue();
+ ObjectInserter obj(elem, "item");
+ fmtZcurve(pos, obj);
+ elem.setLong("weight", elements[i].getWeight());
+ }
+ } else {
+ std::vector<IAttributeVector::largeint_t> elements(16);
+ uint32_t numValues = attribute.get(docid, &elements[0], elements.size());
+ if (numValues > elements.size()) {
+ elements.resize(numValues);
+ numValues = attribute.get(docid, &elements[0], elements.size());
+ assert(numValues <= elements.size());
+ }
+ for (uint32_t i = 0; i < numValues; i++) {
+ int64_t pos = elements[i];
+ ArrayInserter obj(arr);
+ fmtZcurve(pos, obj);
+ }
+ }
+ } else {
+ int64_t pos = attribute.getInt(docid);
+ fmtZcurve(pos, target);
+ }
+}
+
+uint32_t
+GeoPositionDFW::WriteField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * dsState,
+ ResType type,
+ search::RawBuf * target)
+{
+ int str_len_ofs = target->GetUsedLen();
+
+ vespalib::asciistream val(formatField(vec(*dsState), docid));
+
+ bool isLong = IsBinaryCompatible(type, RES_LONG_STRING);
+ if (isLong) {
+ uint32_t str_len_32 = val.size();
+ target->append(&str_len_32, sizeof(str_len_32));
+ target->append(val.c_str(), str_len_32);
+ } else {
+ uint16_t str_len_16 = val.size();
+ target->append(&str_len_16, sizeof(str_len_16));
+ target->append(val.c_str(), str_len_16);
+ }
+ // calculate number of bytes written
+ uint32_t written = target->GetUsedLen() - str_len_ofs;
+ return written;
+}
+
+GeoPositionDFW::UP
+GeoPositionDFW::create(const char *attribute_name,
+ IAttributeManager *attribute_manager)
+{
+ GeoPositionDFW::UP ret;
+ if (attribute_manager != NULL) {
+ if (!attribute_name) {
+ LOG(warning, "create: missing attribute name '%p'", attribute_name);
+ return ret;
+ }
+ IAttributeContext::UP context = attribute_manager->createContext();
+ if (!context.get()) {
+ LOG(warning, "create: could not create context from attribute manager");
+ return ret;
+ }
+ const IAttributeVector *attribute = context->getAttribute(attribute_name);
+ if (!attribute) {
+ LOG(warning, "create: could not get attribute '%s' from context", attribute_name);
+ return ret;
+ }
+ }
+ ret.reset(new GeoPositionDFW(attribute_name));
+ return ret;
+}
+
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h
new file mode 100644
index 00000000000..91d834d45a7
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/attributedfw.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * This is the docsum field writer used to extract the position (as a string) from a zcurve attribute
+ **/
+class GeoPositionDFW : public AttrDFW
+{
+private:
+ vespalib::asciistream formatField(const attribute::IAttributeVector & v, uint32_t docid);
+public:
+ typedef std::unique_ptr<GeoPositionDFW> UP;
+ GeoPositionDFW(const vespalib::string & attrName);
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+ static UP create(const char *attribute_name,
+ IAttributeManager *attribute_manager);
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp
new file mode 100644
index 00000000000..1316a370a7c
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchsummary/docsummary/getdocsumargs.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+
+namespace search {
+namespace docsummary {
+
+GetDocsumArgs::GetDocsumArgs()
+ : _ranking(),
+ _qflags(0),
+ _resultClassName(),
+ _stackItems(0),
+ _stackDump(),
+ _location(),
+ _timeout(30 * fastos::TimeStamp::SEC),
+ _flags(0u),
+ _propertiesMap(),
+ _isLocationSet(false)
+{
+}
+
+
+GetDocsumArgs::~GetDocsumArgs()
+{
+}
+
+void
+GetDocsumArgs::setTimeout(const fastos::TimeStamp & timeout)
+{
+ _timeout = timeout;
+}
+
+fastos::TimeStamp
+GetDocsumArgs::getTimeout() const
+{
+ return _timeout;
+}
+
+
+void
+GetDocsumArgs::Reset()
+{
+ _ranking.clear();
+ _qflags = 0;
+ _stackItems = 0;
+ _timeout = 30 * fastos::TimeStamp::SEC;
+ _flags = 0;
+ _resultClassName.clear();
+ _stackDump.clear();
+ _location.clear();
+ _isLocationSet = false;
+ {
+ PropsMap tmp;
+ std::swap(_propertiesMap, tmp);
+ }
+}
+
+
+void
+GetDocsumArgs::Copy(GetDocsumArgs *src)
+{
+ if (src == this) {
+ return;
+ }
+ *src = *this;
+}
+
+void
+GetDocsumArgs::initFromDocsumRequest(const search::engine::DocsumRequest &req)
+{
+ _ranking = req.ranking;
+ _qflags = req.queryFlags;
+ _resultClassName = req.resultClassName;
+ _stackItems = req.stackItems;
+ _stackDump = req.stackDump;
+ _location = req.location;
+ _timeout = req.getTimeLeft();
+ _flags = req._flags;
+ _propertiesMap = req.propertiesMap;
+ _isLocationSet = (_location.size() > 0);
+}
+
+void
+GetDocsumArgs::SetStackDump(uint32_t stackItems, uint32_t stackDumpLen, const char *stackDump)
+{
+ _stackItems = stackItems;
+ _stackDump.resize(stackDumpLen);
+ memcpy(&_stackDump[0], stackDump, _stackDump.size());
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h
new file mode 100644
index 00000000000..4b4a9e29f02
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2003 Overture Services Norway AS
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+
+#pragma once
+
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/engine/docsumrequest.h>
+#include <vespa/searchlib/engine/propertiesmap.h>
+
+namespace search {
+namespace docsummary {
+
+class GetDocsumArgs
+{
+public:
+ typedef engine::PropertiesMap PropsMap;
+
+private:
+ vespalib::string _ranking;
+ uint32_t _qflags;
+ vespalib::string _resultClassName;
+ uint32_t _stackItems;
+ std::vector<char> _stackDump;
+ vespalib::string _location;
+ fastos::TimeStamp _timeout;
+ uint32_t _flags;
+ PropsMap _propertiesMap;
+
+ bool _isLocationSet;
+
+public:
+ GetDocsumArgs();
+ ~GetDocsumArgs();
+
+ void Reset();
+ void Copy(GetDocsumArgs *src);
+ void initFromDocsumRequest(const search::engine::DocsumRequest &req);
+
+ void SetRankProfile(const vespalib::string &ranking) { _ranking = ranking; }
+ void SetQueryFlags(uint32_t qflags) { _qflags = qflags; }
+ void SetResultClassName(uint32_t len, const char *name) {
+ _resultClassName.assign(name, len);
+ }
+ void setResultClassName(const vespalib::stringref & name) { _resultClassName = name; }
+ void SetStackDump(uint32_t stackItems,
+ uint32_t stackDumpLen, const char *stackDump);
+ void SetLocation(uint32_t locationLen, const char *location) {
+ if ((_isLocationSet = (location != NULL))) {
+ _location.assign(location, locationLen);
+ }
+ }
+
+ void
+ setFlags(uint32_t flags)
+ {
+ _flags = flags;
+ }
+
+ void setTimeout(const fastos::TimeStamp & timeout);
+ fastos::TimeStamp getTimeout() const;
+
+ const vespalib::string & getRankProfile() const { return _ranking; }
+ const vespalib::string & getResultClassName() const { return _resultClassName; }
+ const vespalib::string & getLocation() const { return _location; }
+ const vespalib::stringref getStackDump() const {
+ return vespalib::stringref(&_stackDump[0], _stackDump.size());
+ }
+
+ uint32_t GetQueryFlags() const { return _qflags; }
+ uint32_t GetStackItems() const { return _stackItems; }
+ uint32_t GetLocationLen() const { return _location.size(); }
+ uint32_t getFlags() const { return _flags; }
+
+ const PropsMap &propertiesMap() const { return _propertiesMap; }
+
+ const search::fef::Properties &rankProperties() const {
+ return _propertiesMap.rankProperties();
+ }
+ const search::fef::Properties &featureOverrides() const {
+ return _propertiesMap.featureOverrides();
+ }
+ const search::fef::Properties &highlightTerms() const {
+ return _propertiesMap.highlightTerms();
+ }
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/idocsumenvironment.h b/searchsummary/src/vespa/searchsummary/docsummary/idocsumenvironment.h
new file mode 100644
index 00000000000..546acd24f3f
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/idocsumenvironment.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/juniper/rpinterface.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * Abstract view of information available to rewriters for generating docsum fields.
+ **/
+class IDocsumEnvironment {
+public:
+ virtual search::IAttributeManager * getAttributeManager() = 0;
+ virtual vespalib::string lookupIndex(const vespalib::string & s) const = 0;
+ virtual juniper::Juniper * getJuniper() = 0;
+ virtual ~IDocsumEnvironment() {}
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/itokenizer.h b/searchsummary/src/vespa/searchsummary/docsummary/itokenizer.h
new file mode 100644
index 00000000000..8d221597480
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/itokenizer.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * Interface for a tokenizer.
+ */
+class ITokenizer
+{
+public:
+ /**
+ * Representation of a token with type and text and optional stemmed variant.
+ */
+ class Token
+ {
+ public:
+ enum Type {
+ WORD, // Fast_UnicodeUtil::IsWordChar() returns true
+ NON_WORD, // Fast_UnicodeUtil::IsWordChar() returns false
+ PUNCTUATION, // Fast_UnicodeUtil::IsTerminalPunctuationChar() returns true
+ ANNOTATION, // Interlinear annotation
+ NOT_DEF
+ };
+ private:
+ vespalib::stringref _text;
+ vespalib::stringref _stem;
+ Type _type;
+
+ public:
+ Token(const char * textBegin, const char * textEnd, Type type) :
+ _text(textBegin, textEnd - textBegin), _stem(), _type(type) {}
+ Token(const char * textBegin, const char * textEnd, const char * stemBegin, const char * stemEnd, Type type) :
+ _text(textBegin, textEnd - textBegin), _stem(stemBegin, stemEnd - stemBegin), _type(type) {}
+ const vespalib::stringref & getText() const { return _text; }
+ const vespalib::stringref & getStem() const { return _stem; }
+ bool hasStem() const { return _stem.c_str() != NULL; }
+ Type getType() const { return _type; }
+ };
+
+ virtual ~ITokenizer() {}
+
+ /**
+ * Reset the tokenizer using the given buffer.
+ */
+ virtual void reset(const char * buf, size_t len) = 0;
+
+ /**
+ * Returns the size of the underlying buffer.
+ */
+ virtual size_t getBufferSize() const = 0;
+
+ /**
+ * Returns true if the text buffer has more tokens.
+ */
+ virtual bool hasMoreTokens() = 0;
+
+ /**
+ * Returns the next token from the text buffer.
+ */
+ virtual Token getNextToken() = 0;
+};
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h
new file mode 100644
index 00000000000..0717ba9ed6b
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <vespa/vespalib/data/slime/inserter.h>
+#include "docsumfieldwriter.h"
+
+namespace search {
+namespace docsummary {
+
+class JuniperDFW : public IDocsumFieldWriter
+{
+public:
+ virtual bool Init(
+ const char *fieldName,
+ const char *langFieldName,
+ const ResultConfig & config,
+ const char *inputField);
+protected:
+ JuniperDFW(juniper::Juniper * juniper);
+ virtual ~JuniperDFW();
+
+ uint32_t _inputFieldEnumValue;
+ std::unique_ptr<juniper::Config> _juniperConfig;
+ uint32_t _langFieldEnumValue;
+ juniper::Juniper *_juniper;
+private:
+ virtual bool IsGenerated() const { return false; }
+ JuniperDFW(const JuniperDFW &);
+ JuniperDFW & operator=(const JuniperDFW &);
+};
+
+
+class JuniperTeaserDFW : public JuniperDFW
+{
+public:
+ virtual bool Init(
+ const char *fieldName,
+ const char *langFieldName,
+ const ResultConfig & config,
+ const char *inputField);
+protected:
+ JuniperTeaserDFW(juniper::Juniper * juniper) : JuniperDFW(juniper) { }
+};
+
+
+class DynamicTeaserDFW : public JuniperTeaserDFW
+{
+public:
+ DynamicTeaserDFW(juniper::Juniper * juniper) : JuniperTeaserDFW(juniper) { }
+
+ vespalib::string makeDynamicTeaser(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state);
+
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+} // namespace docsummary
+} // namespace search
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.cpp b/searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.cpp
new file mode 100644
index 00000000000..fc434998465
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.cpp
@@ -0,0 +1,112 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchcommon/config/subscriptionproxyng.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include "juniperproperties.h"
+
+using vespa::config::search::summary::JuniperrcConfig;
+
+namespace search {
+namespace docsummary {
+
+JuniperProperties::JuniperProperties() :
+ _properties()
+{
+ reset();
+}
+
+JuniperProperties::JuniperProperties(const JuniperrcConfig &cfg) :
+ _properties()
+{
+ reset();
+ configure(cfg);
+}
+
+JuniperProperties::~JuniperProperties() {
+ // empty
+}
+
+void
+JuniperProperties::reset()
+{
+ _properties.clear();
+ //_properties["juniper.debug_mask"] = "0";
+ //_properties["juniper.dynsum.connectors"] = "\x1F\x1D";
+ _properties["juniper.dynsum.continuation"] = "\x1E";
+ _properties["juniper.dynsum.escape_markup"] = "off";
+ _properties["juniper.dynsum.fallback"] = "prefix";
+ _properties["juniper.dynsum.highlight_off"] = "\x1F";
+ _properties["juniper.dynsum.highlight_on"] = "\x1F";
+ _properties["juniper.dynsum.preserve_white_space"] = "on";
+ //_properties["juniper.dynsum.length"] = "256";
+ //_properties["juniper.dynsum.max_matches"] = "3";
+ //_properties["juniper.dynsum.min_length"] = "128";
+ //_properties["juniper.dynsum.separators"] = "\x1F\x1D";
+ //_properties["juniper.dynsum.surround_max"] = "128";
+ _properties["juniper.matcher.winsize"] = "200";
+ _properties["juniper.matcher.winsize_fallback_multiplier"] = "10.0";
+ _properties["juniper.matcher.max_match_candidates"] = "1000";
+ //_properties["juniper.proximity.factor"] = "0.25";
+ //_properties["juniper.stem.max_extend"] = "3";
+ //_properties["juniper.stem.min_length"] = "5";
+}
+
+void
+JuniperProperties::configure(const JuniperrcConfig &cfg)
+{
+ reset();
+ _properties["juniper.dynsum.fallback"] = cfg.prefix ? "prefix" : "none";
+ _properties["juniper.dynsum.length"] = vespalib::make_vespa_string("%d", cfg.length);
+ _properties["juniper.dynsum.max_matches"] = vespalib::make_vespa_string("%d", cfg.maxMatches);
+ _properties["juniper.dynsum.min_length"] = vespalib::make_vespa_string("%d", cfg.minLength);
+ _properties["juniper.dynsum.surround_max"] = vespalib::make_vespa_string("%d", cfg.surroundMax);
+ _properties["juniper.matcher.winsize"] = vespalib::make_vespa_string("%d", cfg.winsize);
+ _properties["juniper.matcher.winsize_fallback_multiplier"] = vespalib::make_vespa_string("%f", cfg.winsizeFallbackMultiplier);
+ _properties["juniper.matcher.max_match_candidates"] = vespalib::make_vespa_string("%d", cfg.maxMatchCandidates);
+ _properties["juniper.stem.min_length"] = vespalib::make_vespa_string("%d", cfg.stemMinLength);
+ _properties["juniper.stem.max_extend"] = vespalib::make_vespa_string("%d", cfg.stemMaxExtend);
+
+ for (uint32_t i = 0; i < cfg.override.size(); ++i) {
+ const JuniperrcConfig::Override &override = cfg.override[i];
+ const vespalib::string keyDynsum = vespalib::make_vespa_string("%s.dynsum.", override.fieldname.c_str());
+ const vespalib::string keyMatcher = vespalib::make_vespa_string("%s.matcher.", override.fieldname.c_str());
+ const vespalib::string keyStem = vespalib::make_vespa_string("%s.stem.", override.fieldname.c_str());
+
+ _properties[keyDynsum + "fallback"] = override.prefix ? "prefix" : "none";
+ _properties[keyDynsum + "length"] = vespalib::make_vespa_string("%d", override.length);
+ _properties[keyDynsum + "max_matches"] = vespalib::make_vespa_string("%d", override.maxMatches);
+ _properties[keyDynsum + "min_length"] = vespalib::make_vespa_string("%d", override.minLength);
+ _properties[keyDynsum + "surround_max"] = vespalib::make_vespa_string("%d", override.surroundMax);
+
+ _properties[keyMatcher + "winsize"] = vespalib::make_vespa_string("%d", override.winsize);
+ _properties[keyMatcher + "winsize_fallback_multiplier"] = vespalib::make_vespa_string("%f", override.winsizeFallbackMultiplier);
+ _properties[keyMatcher + "max_match_candidates"] = vespalib::make_vespa_string("%d", override.maxMatchCandidates);
+
+ _properties[keyStem + "min_length"] = vespalib::make_vespa_string("%d", override.stemMinLength);
+ _properties[keyStem + "max_extend"] = vespalib::make_vespa_string("%d", override.stemMaxExtend);
+ }
+}
+
+void
+JuniperProperties::subscribe(const char *configId)
+{
+ SubscriptionProxyNg<JuniperProperties, JuniperrcConfig> subscriber(*this, &JuniperProperties::configure);
+ subscriber.subscribe(configId);
+}
+
+const char *
+JuniperProperties::GetProperty(const char *name, const char *def)
+{
+ std::map<vespalib::string, vespalib::string>::const_iterator it = _properties.find(name);
+ return it != _properties.end() ? it->second.c_str() : def;
+}
+
+void
+JuniperProperties::SetProperty(const vespalib::string &key, const vespalib::string &val)
+{
+ _properties[key] = val;
+}
+
+} // namespace docsummary
+} // namespace search
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.h b/searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.h
new file mode 100644
index 00000000000..5e9cdce48c7
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperproperties.h
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/juniper/IJuniperProperties.h>
+#include <map>
+#include <vespa/searchsummary/config/config-juniperrc.h>
+#include <string>
+
+namespace search {
+namespace docsummary {
+
+class JuniperProperties : public IJuniperProperties {
+private:
+ std::map<vespalib::string, vespalib::string> _properties;
+
+ /**
+ * Resets the property map to all default values. This is used for the empty constructor and also called before
+ * retrieving configured properties.
+ */
+ void reset();
+
+
+public:
+ /**
+ * Constructs a juniper property object with default values set.
+ */
+ JuniperProperties();
+ /**
+ * Constructs a juniper property object with default values set.
+ */
+ JuniperProperties(const vespa::config::search::summary::JuniperrcConfig &cfg);
+
+ /**
+ * Destructor. Frees any allocated resources.
+ */
+ virtual ~JuniperProperties();
+
+ /**
+ * This method subscribes to config from the given configuration id. This does the necessary mapping from
+ * user-friendly configuration parameters to juniper specific properties. Note that no exceptions thrown by the
+ * configuration framework are caught in this method. Please refer to the config framework for details on what to
+ * expect.
+ *
+ * @param configId The config id to subscribe to.
+ */
+ void subscribe(const char *configId);
+
+ /**
+ * Implements configure callback for config subscription.
+ *
+ * @param cfg The configuration object.
+ */
+ void configure(const vespa::config::search::summary::JuniperrcConfig &cfg);
+
+ // Inherit doc from IJuniperProperties.
+ const char *GetProperty(const char *name, const char *def = NULL);
+
+ /**
+ * Sets the value of a given named property. If the property already exists, it is overwritten. If it does not
+ * exist, it is added.
+ *
+ * @param key The name of the property to set.
+ * @param val The value to set for the property.
+ */
+ void SetProperty(const vespalib::string &key, const vespalib::string &val);
+};
+
+} // namespace docsummary
+} // namespace search
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp
new file mode 100644
index 00000000000..6d567f9a6da
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp
@@ -0,0 +1,233 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchsummary/docsummary/docsumstate.h>
+#include <vespa/searchsummary/docsummary/keywordextractor.h>
+
+
+LOG_SETUP(".searchlib.docsummary.keywordextractor");
+
+/** Tell us what parts of the query we are interested in */
+
+namespace search {
+namespace docsummary {
+
+
+bool useful(search::ParseItem::ItemCreator creator)
+{
+ switch (creator)
+ {
+ case search::ParseItem::CREA_ORIG:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+KeywordExtractor::KeywordExtractor(IDocsumEnvironment * env)
+ : _env(env),
+ _legalPrefixes(NULL),
+ _legalIndexes()
+{
+}
+
+
+KeywordExtractor::~KeywordExtractor()
+{
+ while (_legalPrefixes != NULL) {
+ IndexPrefix *tmp = _legalPrefixes;
+ _legalPrefixes = tmp->_next;
+ delete tmp;
+ }
+}
+
+
+void
+KeywordExtractor::AddLegalIndexSpec(const char *spec)
+{
+ if (spec == NULL)
+ return;
+
+ vespalib::string toks(spec); // tokens
+ vespalib::string tok; // single token
+ size_t offset; // offset into tokens buffer
+ size_t seppos; // separator position
+
+ offset = 0;
+ while ((seppos = toks.find(';', offset)) != vespalib::string::npos) {
+ if (seppos == offset) {
+ offset++; // don't want empty tokens
+ } else {
+ tok = toks.substr(offset, seppos - offset);
+ offset = seppos + 1;
+ if (tok[tok.size() - 1] == '*') {
+ tok.resize(tok.size() - 1);
+ AddLegalIndexPrefix(tok.c_str());
+ } else {
+ AddLegalIndexName(tok.c_str());
+ }
+ }
+ }
+ if (toks.size() > offset) { // catch last token
+ tok = toks.substr(offset);
+ if (tok[tok.size() - 1] == '*') {
+ tok.resize(tok.size() - 1);
+ AddLegalIndexPrefix(tok.c_str());
+ } else {
+ AddLegalIndexName(tok.c_str());
+ }
+ }
+}
+
+
+vespalib::string
+KeywordExtractor::GetLegalIndexSpec()
+{
+ vespalib::string spec;
+
+ if (_legalPrefixes != NULL) {
+ for (IndexPrefix *pt = _legalPrefixes;
+ pt != NULL; pt = pt->_next) {
+ if (spec.size() > 0)
+ spec.append(';');
+ spec.append(pt->_prefix);
+ spec.append('*');
+ }
+ }
+
+ for (Set::const_iterator it(_legalIndexes.begin()), mt(_legalIndexes.end()); it != mt; it++) {
+ if (spec.size() > 0)
+ spec.append(';');
+ spec.append(*it);
+ }
+ return spec;
+}
+
+
+bool
+KeywordExtractor::IsLegalIndex(const char *idxName, size_t idxNameLen) const
+{
+ vespalib::string resolvedIdxName;
+ vespalib::string idxS(idxName, idxNameLen);
+
+ if (_env != NULL) {
+ resolvedIdxName = _env->lookupIndex(idxS);
+ } else {
+
+ if ( ! idxS.empty() ) {
+ resolvedIdxName = idxS;
+ } else {
+ resolvedIdxName = "__defaultindex";
+ }
+ }
+
+ if (resolvedIdxName.empty())
+ return false;
+
+ return (IsLegalIndexPrefix(resolvedIdxName.c_str()) ||
+ IsLegalIndexName(resolvedIdxName.c_str()));
+}
+
+
+char *
+KeywordExtractor::ExtractKeywords(const vespalib::stringref &buf) const
+{
+ const char *str_ptr;
+ size_t str_len;
+ search::SimpleQueryStackDumpIterator si(buf);
+ char keywordstore[4096]; // Initial storage for keywords buffer
+ search::RawBuf keywords(keywordstore, sizeof(keywordstore));
+
+ while (si.next()) {
+ search::ParseItem::ItemCreator creator = si.getCreator();
+ switch (si.getType()) {
+ case search::ParseItem::ITEM_NOT:
+ /**
+ * @todo Must consider only the first argument on the stack.
+ * Difficult without recursion.
+ */
+ break;
+
+ case search::ParseItem::ITEM_PHRASE:
+ {
+ // Must take the next arity TERMS and put together
+ bool phraseterms_was_added = false;
+ int phraseterms = si.getArity();
+ for (int i = 0; i < phraseterms; i++) {
+ si.next();
+ search::ParseItem::ItemType newtype = si.getType();
+ if (newtype != search::ParseItem::ITEM_TERM &&
+ newtype != search::ParseItem::ITEM_NUMTERM)
+ {
+ // stack syntax error
+ // LOG(debug, "Extracting keywords found a non-term in a phrase");
+ // making a clean escape.
+ keywords.reset();
+ goto iteratorloopend;
+ } else {
+ si.getIndexName(&str_ptr, &str_len);
+ if (!IsLegalIndex(str_ptr, str_len))
+ continue;
+ // Found a term
+ si.getTerm(&str_ptr, &str_len);
+ search::ParseItem::ItemCreator term_creator = si.getCreator();
+ if (str_len > 0 && useful(term_creator)) {
+ // Actual term to add
+ if (phraseterms_was_added)
+ // Not the first term in the phrase
+ keywords += " ";
+ else
+ phraseterms_was_added = true;
+
+ keywords.append(str_ptr, str_len);
+ }
+ }
+ }
+ if (phraseterms_was_added)
+ // Terms was added, so 0-terminate the string
+ keywords.append("\0", 1);
+
+ break;
+ }
+ case search::ParseItem::ITEM_PREFIXTERM:
+ case search::ParseItem::ITEM_SUBSTRINGTERM:
+ case search::ParseItem::ITEM_EXACTSTRINGTERM:
+ case search::ParseItem::ITEM_NUMTERM:
+ case search::ParseItem::ITEM_TERM:
+ si.getIndexName(&str_ptr, &str_len);
+ if (!IsLegalIndex(str_ptr, str_len))
+ continue;
+ // add a new keyword
+ si.getTerm(&str_ptr, &str_len);
+ if (str_len > 0 && useful(creator)) {
+ // An actual string to add
+ keywords.append(str_ptr, str_len);
+ keywords.append("\0", 1);
+ }
+ break;
+
+ default:
+ // Do nothing to AND, RANK, OR
+ break;
+ }
+ }
+ iteratorloopend:
+ // Add a 'blank' keyword
+ keywords.append("\0", 1);
+
+ // Must now allocate a string and copy the data from the rawbuf
+ void *result = malloc(keywords.GetUsedLen());
+ if (result != NULL) {
+ memcpy(result, keywords.GetDrainPos(), keywords.GetUsedLen());
+ }
+ return static_cast<char *>(result);
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h
new file mode 100644
index 00000000000..750cfb2cdee
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h
@@ -0,0 +1,164 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchsummary/docsummary/idocsumenvironment.h>
+
+namespace search {
+namespace docsummary {
+
+class KeywordExtractor
+{
+private:
+ KeywordExtractor(const KeywordExtractor &);
+ KeywordExtractor& operator=(const KeywordExtractor &);
+
+public:
+
+ class IndexPrefix
+ {
+ private:
+ IndexPrefix(const IndexPrefix &);
+ IndexPrefix& operator=(const IndexPrefix &);
+
+ public:
+ char *_prefix;
+ int _prefixLen;
+ IndexPrefix *_next;
+
+
+ IndexPrefix(const char *prefix,
+ IndexPrefix **list)
+ : _prefix(NULL),
+ _prefixLen(0),
+ _next(NULL)
+ {
+ _prefix = strdup(prefix);
+ assert(_prefix != NULL);
+ _prefixLen = strlen(prefix);
+ _next = *list;
+ *list = this;
+ }
+
+ ~IndexPrefix()
+ {
+ free(_prefix);
+ }
+
+ bool Match(const char *idxName) const
+ {
+ return (strncmp(idxName, _prefix, _prefixLen) == 0);
+ }
+ };
+
+private:
+ typedef vespalib::hash_set<vespalib::string> Set;
+ IDocsumEnvironment *_env;
+ IndexPrefix *_legalPrefixes;
+ Set _legalIndexes;
+
+
+ bool IsLegalIndexPrefix(const char *idxName) const
+ {
+ for (const IndexPrefix *pt = _legalPrefixes;
+ pt != NULL;
+ pt = pt->_next)
+ {
+ if (pt->Match(idxName))
+ return true;
+ }
+ return false;
+ }
+
+ bool IsLegalIndexName(const char *idxName) const
+ {
+ return _legalIndexes.find(idxName) != _legalIndexes.end();
+ }
+
+public:
+ explicit KeywordExtractor(IDocsumEnvironment * env);
+ ~KeywordExtractor();
+
+
+ /**
+ * Add a prefix to the set of legal index name prefixes.
+ *
+ * @param prefix the index name prefix to add.
+ **/
+ void AddLegalIndexPrefix(const char *prefix)
+ {
+ //Self destructing construction
+ new IndexPrefix(prefix, &_legalPrefixes);
+ }
+
+
+ /**
+ * Add a name to the set of legal index names.
+ *
+ * @param idxName the index name to add.
+ **/
+ void AddLegalIndexName(const char *idxName)
+ {
+ _legalIndexes.insert(idxName);
+ }
+
+
+ /**
+ * Parse the input string as a ';' separated list of index names and
+ * index name prefixes. A '*' following a token in the list denotes
+ * that the token is an index name prefix. Add the index names and
+ * index name prefixes to the set of legal values.
+ *
+ * @param spec list of legal index names and prefixes.
+ **/
+ void AddLegalIndexSpec(const char *spec);
+
+
+ /**
+ * Create a spec on the same format as accepted by the @ref
+ * AddLegalIndexSpec method. Freeing the returned spec is the
+ * responsibility of the caller of this method.
+ *
+ * @return spec defining legal index names and prefixes.
+ **/
+ vespalib::string GetLegalIndexSpec();
+
+
+ /**
+ * Determine wether the given index name is legal by checking it
+ * against the current set of legal index names and index name
+ * prefixes held by this object.
+ *
+ * @return true if the given index name is legal.
+ **/
+ bool IsLegalIndex(const char *idxName, size_t idxNameLen) const;
+
+
+ /**
+ * Extract keywords from a stack dump of a SimpleQueryStack.
+ *
+ * The words are extracted as follows: For AND and OR operators, all
+ * TERM items occuring in a legal index (the set of legal indexes is
+ * defined by invoking the @ref AddLegalIndex and @ref
+ * AddLegalIndexPrefix methods) are extracted.
+ *
+ * For PHRASE operators, the TERMS in a phrase are put together with
+ * space between them.
+ *
+ * @todo For NOT operators, only the first operand is considered.
+ *
+ * @param buf Pointer to buffer with simple query stack dump.
+ * @param bufLen Length of stack dump buffer
+ * @return Pointer to a buffer containing zero-terminated keywords,
+ * with an empty word at the end.
+ */
+ char *ExtractKeywords(const vespalib::stringref &buf) const;
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp
new file mode 100644
index 00000000000..6c47b305f09
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp
@@ -0,0 +1,337 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "positionsdfw.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.docsummary.positionsdfw");
+
+namespace search {
+namespace docsummary {
+
+using search::attribute::IAttributeContext;
+using search::attribute::IAttributeVector;
+using search::attribute::BasicType;
+using search::common::Location;
+
+AbsDistanceDFW::AbsDistanceDFW(const vespalib::string & attrName) :
+ AttrDFW(attrName)
+{
+}
+
+uint64_t
+AbsDistanceDFW::findMinDistance(uint32_t docid,
+ GetDocsumsState *state)
+{
+ search::common::Location &location = *state->_parsedLocation;
+ const IAttributeVector & attribute(vec(*state));
+
+ uint64_t absdist = std::numeric_limits<int64_t>::max();
+ int32_t docx = 0;
+ int32_t docy = 0;
+ std::vector<IAttributeVector::largeint_t> pos(16);
+ uint32_t numValues = attribute.get(docid, &pos[0], pos.size());
+ if (numValues > pos.size()) {
+ pos.resize(numValues);
+ numValues = attribute.get(docid, &pos[0], pos.size());
+ assert(numValues <= pos.size());
+ }
+ for (uint32_t i = 0; i < numValues; i++) {
+ int64_t docxy(pos[i]);
+ vespalib::geo::ZCurve::decode(docxy, &docx, &docy);
+ uint32_t dx;
+ if (location.getX() > docx) {
+ dx = location.getX() - docx;
+ } else {
+ dx = docx - location.getX();
+ }
+ if (location.getXAspect() != 0) {
+ dx = ((uint64_t) dx * location.getXAspect()) >> 32;
+ }
+ uint32_t dy;
+ if (location.getY() > docy) {
+ dy = location.getY() - docy;
+ } else {
+ dy = docy - location.getY();
+ }
+ uint64_t dist2 = dx * (uint64_t) dx +
+ dy * (uint64_t) dy;
+ if (dist2 < absdist) {
+ absdist = dist2;
+ }
+ }
+ return (uint64_t) sqrt((double) absdist);
+}
+
+void
+AbsDistanceDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target)
+{
+ bool forceEmpty = true;
+
+ const vespalib::string &locationStr = state->_args.getLocation();
+ if (locationStr.size() > 0) {
+ if (state->_parsedLocation.get() == NULL) {
+ state->_callback.ParseLocation(state);
+ }
+ assert(state->_parsedLocation.get() != NULL);
+ if (state->_parsedLocation->getParseError() == NULL) {
+ forceEmpty = false;
+ }
+ }
+ if (forceEmpty) return;
+
+ uint64_t absdist = findMinDistance(docid, state);
+
+ if (type == RES_INT) {
+ target.insertLong(absdist);
+ } else {
+ vespalib::string value = vespalib::stringify(absdist);
+ vespalib::slime::Memory data(value.c_str(), value.size());
+
+ if (type == RES_STRING ||
+ type == RES_LONG_STRING ||
+ type == RES_XMLSTRING)
+ {
+ target.insertString(data);
+ }
+ if (type == RES_LONG_DATA ||
+ type == RES_DATA)
+ {
+ target.insertData(data);
+ }
+ }
+}
+
+
+uint32_t
+AbsDistanceDFW::WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target)
+{
+ (void) gres;
+
+ bool forceEmpty = true;
+
+ const vespalib::string &locationStr = state->_args.getLocation();
+ if (locationStr.size() > 0) {
+ if (state->_parsedLocation.get() == NULL) {
+ state->_callback.ParseLocation(state);
+ }
+ assert(state->_parsedLocation.get() != NULL);
+ if (state->_parsedLocation->getParseError() == NULL) {
+ forceEmpty = false;
+ }
+ }
+
+ uint32_t written = 0;
+ if (!forceEmpty) {
+ uint64_t absdist = findMinDistance(docid, state);
+
+ if (type != RES_INT) {
+ bool isLong = IsBinaryCompatible(type, RES_LONG_STRING);
+ uint16_t str_len_16 = 0;
+ uint32_t str_len_32 = 0;
+ int str_len_ofs = target->GetUsedLen();
+
+ if (isLong)
+ target->append(&str_len_32, sizeof(str_len_32));
+ else
+ target->append(&str_len_16, sizeof(str_len_16));
+
+ target->addNum64(absdist, 1, ' ');
+
+ // calculate number of bytes written
+ written = target->GetUsedLen() - str_len_ofs;
+
+ // patch in correct field length
+ if (isLong) {
+ str_len_32 = written - sizeof(str_len_32);
+ memcpy(target->GetWritableDrainPos(str_len_ofs), &str_len_32,
+ sizeof(str_len_32));
+ } else {
+ str_len_16 = written - sizeof(str_len_16);
+ memcpy(target->GetWritableDrainPos(str_len_ofs), &str_len_16,
+ sizeof(str_len_16));
+ }
+ } else {
+ uint32_t val32 = (uint32_t) absdist;
+ target->append(&val32, sizeof(val32));
+ written = sizeof(val32);
+ }
+ } else {
+ if (type != RES_INT) {
+ bool isLong = IsBinaryCompatible(type, RES_LONG_STRING);
+ uint16_t str_len_16 = 0;
+ uint32_t str_len_32 = 0;
+ int str_len_ofs = target->GetUsedLen();
+
+ if (isLong)
+ target->append(&str_len_32, sizeof(str_len_32));
+ else
+ target->append(&str_len_16, sizeof(str_len_16));
+
+ // calculate number of bytes written
+ written = target->GetUsedLen() - str_len_ofs;
+ } else {
+ uint32_t val32 = 0u;
+ target->append(&val32, sizeof(val32));
+ written = sizeof(val32);
+ }
+ }
+ return written;
+}
+
+//--------------------------------------------------------------------------
+
+PositionsDFW::PositionsDFW(const vespalib::string & attrName) :
+ AttrDFW(attrName)
+{
+}
+
+vespalib::asciistream
+PositionsDFW::formatField(const attribute::IAttributeVector & attribute, uint32_t docid, ResType type)
+{
+ vespalib::asciistream target;
+ int32_t docx = 0;
+ int32_t docy = 0;
+
+ std::vector<IAttributeVector::largeint_t> pos(16);
+ uint32_t numValues = attribute.get(docid, &pos[0], pos.size());
+ if (numValues > pos.size()) {
+ pos.resize(numValues);
+ numValues = attribute.get(docid, &pos[0], pos.size());
+ assert(numValues <= pos.size());
+ }
+ LOG(debug, "docid=%d, numValues=%d", docid, numValues);
+
+ bool isShort = ! IsBinaryCompatible(type, RES_LONG_STRING);
+ for (uint32_t i = 0; i < numValues; i++) {
+ int64_t docxy(pos[i]);
+ vespalib::geo::ZCurve::decode(docxy, &docx, &docy);
+ if (docx == 0 && docy == INT_MIN) {
+ LOG(spam, "skipping empty zcurve value");
+ continue;
+ }
+ double degrees_ns = docy; degrees_ns /= 1000000.0;
+ double degrees_ew = docx; degrees_ew /= 1000000.0;
+
+ target << "<position x=\"" << docx << "\" y=\"" << docy << "\"";
+ target << " latlong=\"";
+ target << vespalib::FloatSpec::fixed;
+ if (degrees_ns < 0) {
+ target << "S" << (-degrees_ns);
+ } else {
+ target << "N" << degrees_ns;
+ }
+ target << ";";
+ if (degrees_ew < 0) {
+ target << "W" << (-degrees_ew);
+ } else {
+ target << "E" << degrees_ew;
+ }
+ target << "\" />";
+ if (isShort && target.size() > 30000) {
+ target << "<overflow />";
+ break;
+ }
+ }
+ return target;
+}
+
+
+uint32_t
+PositionsDFW::WriteField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * dsState,
+ ResType type,
+ search::RawBuf *target)
+{
+ int str_len_ofs = target->GetUsedLen();
+
+ vespalib::asciistream val(formatField(vec(*dsState), docid, type));
+
+ bool isLong = IsBinaryCompatible(type, RES_LONG_STRING);
+ if (isLong) {
+ uint32_t str_len_32 = val.size();
+ target->append(&str_len_32, sizeof(str_len_32));
+ target->append(val.c_str(), str_len_32);
+ } else {
+ uint16_t str_len_16 = val.size();
+ target->append(&str_len_16, sizeof(str_len_16));
+ target->append(val.c_str(), str_len_16);
+ }
+ // calculate number of bytes written
+ uint32_t written = target->GetUsedLen() - str_len_ofs;
+ return written;
+}
+
+
+void
+PositionsDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState * dsState,
+ ResType type,
+ vespalib::slime::Inserter &target)
+{
+ vespalib::asciistream val(formatField(vec(*dsState), docid, type));
+ target.insertString(vespalib::slime::Memory(val.c_str(), val.size()));
+}
+
+//--------------------------------------------------------------------------
+
+PositionsDFW::UP createPositionsDFW(const char *attribute_name,
+ IAttributeManager *attribute_manager)
+{
+ PositionsDFW::UP ret;
+ if (attribute_manager != NULL) {
+ if (!attribute_name) {
+ LOG(debug, "createPositionsDFW: missing attribute name '%p'", attribute_name);
+ return ret;
+ }
+ IAttributeContext::UP context = attribute_manager->createContext();
+ if (!context.get()) {
+ LOG(debug, "createPositionsDFW: could not create context from attribute manager");
+ return ret;
+ }
+ const IAttributeVector *attribute = context->getAttribute(attribute_name);
+ if (!attribute) {
+ LOG(debug, "createPositionsDFW: could not get attribute '%s' from context", attribute_name);
+ return ret;
+ }
+ }
+ ret.reset(new PositionsDFW(attribute_name));
+ return ret;
+}
+
+AbsDistanceDFW::UP createAbsDistanceDFW(const char *attribute_name,
+ IAttributeManager *attribute_manager)
+{
+ AbsDistanceDFW::UP ret;
+ if (attribute_manager != NULL) {
+ if (!attribute_name) {
+ LOG(debug, "createAbsDistanceDFW: missing attribute name '%p'", attribute_name);
+ return ret;
+ }
+ IAttributeContext::UP context = attribute_manager->createContext();
+ if (!context.get()) {
+ LOG(debug, "createAbsDistanceDFW: could not create context from attribute manager");
+ return ret;
+ }
+ const IAttributeVector *attribute = context->getAttribute(attribute_name);
+ if (!attribute) {
+ LOG(debug, "createAbsDistanceDFW: could not get attribute '%s' from context", attribute_name);
+ return ret;
+ }
+ }
+ ret.reset(new AbsDistanceDFW(attribute_name));
+ return ret;
+}
+
+} // namespace docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h
new file mode 100644
index 00000000000..301bf2ceecc
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/attributedfw.h>
+
+namespace search {
+namespace docsummary {
+
+class AbsDistanceDFW : public AttrDFW
+{
+private:
+ uint64_t findMinDistance(uint32_t docid, GetDocsumsState *state);
+public:
+ AbsDistanceDFW(const vespalib::string & attrName);
+
+ virtual bool IsGenerated() const { return true; }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+//--------------------------------------------------------------------------
+
+class PositionsDFW : public AttrDFW
+{
+private:
+ vespalib::asciistream formatField(const attribute::IAttributeVector & v, uint32_t docid, ResType type);
+
+public:
+ typedef std::unique_ptr<PositionsDFW> UP;
+
+ PositionsDFW(const vespalib::string & attrName);
+
+ virtual bool IsGenerated() const { return true; }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ search::RawBuf *target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+PositionsDFW::UP createPositionsDFW(const char *attribute_name,
+ IAttributeManager *index_man);
+
+AbsDistanceDFW::UP createAbsDistanceDFW(const char *attribute_name,
+ IAttributeManager *index_man);
+
+} // namespace docsummary
+} // namespace search
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp
new file mode 100644
index 00000000000..8f5055f6d1d
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/common/featureset.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchsummary/docsummary/rankfeaturesdfw.h>
+#include <vespa/searchlib/common/feature.h>
+#include "docsumformat.h"
+
+LOG_SETUP(".searchlib.docsummary.rankfeaturesdfw");
+
+namespace search {
+namespace docsummary {
+
+RankFeaturesDFW::RankFeaturesDFW() :
+ _env(NULL)
+{
+}
+
+RankFeaturesDFW::~RankFeaturesDFW()
+{
+}
+
+void
+RankFeaturesDFW::init(IDocsumEnvironment * env)
+{
+ _env = env;
+}
+
+uint32_t
+RankFeaturesDFW::WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target)
+{
+ (void) gres;
+
+ if (state->_rankFeatures.get() == NULL) {
+ state->_callback.FillRankFeatures(state, _env);
+ if (state->_rankFeatures.get() == NULL) { // still no rank features to write
+ return DocsumFormat::addEmpty(type, *target);
+ }
+ }
+
+ uint32_t written = 0;
+
+ const FeatureSet::StringVector & names = state->_rankFeatures->getNames();
+ const feature_t * values = state->_rankFeatures->getFeaturesByDocId(docid);
+ vespalib::JSONStringer & json(state->_jsonStringer);
+ if (values != NULL) {
+ json.clear();
+ json.beginObject();
+ for (uint32_t i = 0; i < names.size(); ++i) {
+ featureDump(json, names[i], values[i]);
+ }
+ json.endObject();
+ written += SummaryFeaturesDFW::writeString(json.toString(), type, target);
+ json.clear();
+ } else {
+ written += DocsumFormat::addEmpty(type, *target);
+ }
+
+ return written;
+}
+
+
+void
+RankFeaturesDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target)
+{
+ if (state->_rankFeatures.get() == NULL) {
+ state->_callback.FillRankFeatures(state, _env);
+ if (state->_rankFeatures.get() == NULL) { // still no rank features to write
+ return;
+ }
+ }
+ const FeatureSet::StringVector & names = state->_rankFeatures->getNames();
+ const feature_t * values = state->_rankFeatures->getFeaturesByDocId(docid);
+ if (type == RES_FEATUREDATA && values != NULL) {
+ vespalib::slime::Cursor& obj = target.insertObject();
+ for (uint32_t i = 0; i < names.size(); ++i) {
+ vespalib::slime::Memory name(names[i].c_str(), names[i].size());
+ obj.setDouble(name, values[i]);
+ }
+ return;
+ }
+ vespalib::JSONStringer & json(state->_jsonStringer);
+ if (values != NULL) {
+ json.clear();
+ json.beginObject();
+ for (uint32_t i = 0; i < names.size(); ++i) {
+ featureDump(json, names[i], values[i]);
+ }
+ json.endObject();
+ vespalib::slime::Memory value(json.toString().c_str(),
+ json.toString().size());
+ if (type == RES_STRING || type == RES_LONG_STRING) {
+ target.insertString(value);
+ }
+ if (type == RES_DATA || type == RES_LONG_DATA) {
+ target.insertData(value);
+ }
+ json.clear();
+ }
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h
new file mode 100644
index 00000000000..a04271a16c1
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vespa/searchsummary/docsummary/summaryfeaturesdfw.h>
+
+namespace search {
+namespace docsummary {
+
+class RankFeaturesDFW : public FeaturesDFW
+{
+private:
+ RankFeaturesDFW(const RankFeaturesDFW &);
+ RankFeaturesDFW & operator=(const RankFeaturesDFW &);
+
+ IDocsumEnvironment * _env;
+
+public:
+ RankFeaturesDFW();
+ virtual ~RankFeaturesDFW();
+ void init(IDocsumEnvironment * env);
+ virtual bool IsGenerated() const { return true; }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
new file mode 100644
index 00000000000..19d8baccf45
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/fnet/frt/frt.h>
+#include <vespa/searchsummary/docsummary/resultclass.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+
+#include <zlib.h>
+
+LOG_SETUP(".searchlib.docsummary.resultclass");
+
+namespace search {
+namespace docsummary {
+
+ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum)
+ : _name(name),
+ _classID(id),
+ _entries(),
+ _nameMap(),
+ _fieldEnum(fieldEnum),
+ _enumMap(),
+ _dynInfo(NULL)
+{
+}
+
+
+ResultClass::~ResultClass()
+{
+}
+
+
+bool
+ResultClass::AddConfigEntry(const char *name, ResType type)
+{
+ if (_nameMap.find(name) != _nameMap.end())
+ return false;
+
+ _nameMap[name] = _entries.size();
+ ResConfigEntry e;
+ e._type = type;
+ e._bindname = name;
+ e._enumValue = _fieldEnum.Add(name);
+ assert(e._enumValue >= 0);
+ _entries.push_back(e);
+ return true;
+}
+
+
+void
+ResultClass::CreateEnumMap()
+{
+ _enumMap.resize(_fieldEnum.GetNumEntries());
+
+ for (uint32_t i(0), m(_enumMap.size()); i < m; i++) {
+ _enumMap[i] = -1;
+ }
+ for (uint32_t i(0); i < _entries.size(); i++) {
+ _enumMap[_entries[i]._enumValue] = i;
+ }
+}
+
+
+bool
+ResEntry::_extract_field(search::RawBuf *target) const
+{
+ bool rc = true;
+ target->reset();
+
+ if (ResultConfig::IsVariableSize(_type)) {
+ if (_is_compressed()) { // COMPRESSED
+
+ uint32_t len = _get_length();
+ uint32_t realLen = 0;
+
+ if (len >= sizeof(uint32_t))
+ realLen = _get_real_length();
+ else
+ rc = false;
+
+ if (realLen > 0) {
+ uLongf rlen = realLen;
+ char *fillPos = target->GetWritableFillPos(realLen + 1 < 32000 ?
+ 32000 : realLen + 1);
+ if ((uncompress((Bytef *)fillPos, &rlen,
+ (const Bytef *)(_get_compressed()),
+ len - sizeof(realLen)) == Z_OK) &&
+ rlen == realLen) {
+ fillPos[realLen] = '\0';
+ target->Fill(realLen);
+ } else {
+ rc = false;
+ }
+ }
+ } else { // UNCOMPRESSED
+ uint32_t len = _len;
+ if (len + 1 < 32000)
+ target->preAlloc(32000);
+ else
+ target->preAlloc(len + 1);
+ char *fillPos = target->GetWritableFillPos(len + 1 < 32000 ?
+ 32000 : len + 1);
+ memcpy(fillPos, _pt, len);
+ fillPos[len] = '\0';
+ target->Fill(len);
+ }
+ }
+ return rc;
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
new file mode 100644
index 00000000000..e35408a796c
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
@@ -0,0 +1,291 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/searchlib/util/stringenum.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * This enumeration contains values denoting the different types of
+ * docsum fields. NOTE: The internal implementation depends on RES_INT
+ * having the value 0. All types < RES_STRING must be fixed size and
+ * all types > RES_STRING must be variable size.
+ **/
+enum ResType {
+ RES_INT = 0,
+ RES_SHORT,
+ RES_BYTE,
+ RES_FLOAT,
+ RES_DOUBLE,
+ RES_INT64,
+ RES_STRING,
+ RES_DATA,
+ RES_LONG_STRING,
+ RES_LONG_DATA,
+ RES_XMLSTRING,
+ RES_JSONSTRING,
+ RES_FEATUREDATA
+};
+
+
+/**
+ * This struct describes a single docsum field (name and type). A
+ * docsum blob is unpacked into an array of ResEntry instances
+ * by interpreting it as described by an array of ResConfigEntry
+ * instances.
+ **/
+struct ResConfigEntry {
+ ResType _type;
+ vespalib::string _bindname;
+ int _enumValue;
+};
+
+
+/**
+ * This struct holds the actual value of a single docsum field. A
+ * docsum blob is unpacked into an array of ResEntry instances
+ * by interpreting it as described by an array of ResConfigEntry
+ * instances. Note that type normalization is performed when unpacking
+ * docsum fields. Fields of type RES_BYTE and RES_SHORT are promoted
+ * to RES_INT. Fields of type RES_FLOAT are promoted to RES_DOUBLE.
+ **/
+struct ResEntry
+{
+ ResType _type;
+ union {
+ uint32_t _intval;
+ uint32_t _stringlen;
+ uint32_t _datalen;
+ uint32_t _len;
+ uint64_t _int64val;
+ double _doubleval;
+ };
+ union {
+ char *_stringval;
+ char *_dataval;
+ void *_pt;
+ };
+
+ bool _extract_field(search::RawBuf *target) const;
+
+ uint32_t _get_length() const { return (_len & 0x7fffffff); }
+ bool _is_compressed() const { return (_len & 0x80000000) != 0; }
+ uint32_t _get_real_length() const
+ {
+ // precond: IsVariableSize(_type) && _len >= sizeof(uint32_t)
+
+ uint32_t rlen;
+ memcpy(&rlen, _pt, sizeof(rlen));
+ return rlen;
+ }
+ const void *_get_compressed() const
+ {
+ // precond: IsVariableSize(_type) && _len >= sizeof(uint32_t)
+
+ return (const void *)(((const char *) _pt) + sizeof(uint32_t));
+ }
+ void _resolve_field(const char **buf, uint32_t *buflen,
+ search::RawBuf *target) const
+ {
+ // precond: IsVariableSize(_type)
+
+ if (_is_compressed()) {
+ if (_extract_field(target)) {
+ *buf = target->GetDrainPos();
+ *buflen = target->GetUsedLen();
+ } else {
+ *buf = NULL;
+ *buflen = 0;
+ }
+ } else {
+ *buf = (char *) _pt;
+ *buflen = _len;
+ }
+ }
+};
+
+/**
+ * This class represents a specific docsum format (docsum class). It
+ * contains an array of ResConfigEntry instances (config
+ * entries) that may be used to unpack docsum blobs into
+ * ResEntry arrays. It also contains methods for mapping both
+ * field name and field name enum value into field index. The field
+ * index may then be used to access the actual field in the
+ * GeneralResult object representing the unpacked docsum blob.
+ **/
+class ResultClass
+{
+public:
+ struct DynamicInfo
+ {
+ uint32_t _overrideCnt; // # fields overridden
+ uint32_t _generateCnt; // # fields generated
+ };
+
+private:
+ ResultClass(const ResultClass &);
+ ResultClass& operator=(const ResultClass &);
+ typedef vespalib::hash_map<vespalib::string, int> NameIdMap;
+ typedef std::vector<ResConfigEntry> Configs;
+
+ vespalib::string _name; // name of this class
+ uint32_t _classID; // ID of this class
+ Configs _entries; // config entries for this result class
+ NameIdMap _nameMap; // fieldname -> entry index
+ util::StringEnum &_fieldEnum; // fieldname -> f.n. enum value [SHARED]
+ std::vector<int> _enumMap; // fieldname enum value -> entry index
+ DynamicInfo *_dynInfo; // fields overridden and generated
+
+public:
+ typedef std::unique_ptr<ResultClass> UP;
+
+ /**
+ * Constructor. Assign name and id to this result class. Also gain
+ * ref. to shared string enum object and insert into linked list.
+ *
+ * @param name the name of this result class.
+ * @param id the numeric id of this result class.
+ * @param fieldEnum shared object used to enumerate field names.
+ **/
+ ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum);
+
+ /**
+ * Destructor. Delete internal structures.
+ **/
+ ~ResultClass();
+
+
+ /**
+ * Attach dynamic field data to this result class.
+ *
+ * @param data pointer to dynamic field data.
+ **/
+ void setDynamicInfo(DynamicInfo *data) { _dynInfo = data; }
+
+
+ /**
+ * Obtain pointer to dynamic field data attached to this result class.
+ *
+ * @return pointer to dynamic field data.
+ **/
+ DynamicInfo *getDynamicInfo() const { return _dynInfo; }
+
+
+ /**
+ * Obtain the name of this result class.
+ *
+ * @return name of this result class.
+ **/
+ const char *GetClassName() const { return _name.c_str(); }
+
+
+ /**
+ * Obtain the numeric id of this result class.
+ *
+ * @return numeric id of this result class.
+ **/
+ uint32_t GetClassID() const { return _classID; }
+
+
+ /**
+ * Obtain the number of config entries (size of the
+ * ResConfigEntry array) held by this result class.
+ *
+ * @return number of config entries held by this object.
+ **/
+ uint32_t GetNumEntries() const { return _entries.size(); }
+
+
+ /**
+ * Add a config entry to this result class. Each config entry
+ * contains the name and type of a field present in the docsum blobs
+ * conforming to this result class. This method will fail if the
+ * field name given already has been used to name a field in this
+ * result class.
+ *
+ * @return true(success)/false(fail)
+ * @param name the name of the field to add.
+ * @param type the type of the field to add.
+ **/
+ bool AddConfigEntry(const char *name, ResType type);
+
+
+ /**
+ * This method may be called to create an internal mapping from
+ * field name enumerated value to field index. When building up a
+ * result configuration possibly containing several result classes,
+ * all field names are enumerated (across all result classes),
+ * assigning a single unique integer value to each field name. This
+ * is done with the StringEnum object given to the
+ * constructor. This way, fastserver components that want to
+ * reference a unique field name may use the enumerated value
+ * instead of the string itself. NOTE: This method must be called in
+ * order to use the GetIndexFromEnumValue method. NOTE2: This method
+ * is called by the ResultConfig::CreateEnumMaps method; no
+ * need to call it directly.
+ **/
+ void CreateEnumMap();
+
+
+ /**
+ * Obtain the field index from the field name. The field index may
+ * be used to look up a config entry in this object, or to look up a
+ * result entry in a GeneralResult object. NOTE: When using
+ * the return value from this method to look up a result entry in a
+ * GeneralResult object, make sure that the
+ * GeneralResult object has this object as it's result
+ * class. NOTE2: This method is called by the
+ * GeneralResult::GetEntry(string) method; no need to call it
+ * directly.
+ *
+ * @return field index or -1 if not found.
+ **/
+ int GetIndexFromName(const char* name) const
+ {
+ NameIdMap::const_iterator found(_nameMap.find(name));
+ return (found != _nameMap.end()) ? found->second : -1;
+ }
+
+
+ /**
+ * Obtain the field index from the field name enumerated value. The
+ * field index may be used to look up a config entry in this object,
+ * or to look up a result entry in a GeneralResult
+ * object. NOTE: When using the return value from this method to
+ * look up a result entry in a GeneralResult object, make sure
+ * that the GeneralResult object has this object as it's
+ * result class. NOTE2: This method is called by the
+ * GeneralResult::GetEntryFromEnumValue method; no need to
+ * call it directly. NOTE3: You need to call the CreateEnumMap
+ * method before calling this one.
+ *
+ * @return field index or -1 if not found.
+ **/
+ int GetIndexFromEnumValue(uint32_t value) const
+ {
+ return (value < _enumMap.size()) ? _enumMap[value] : -1;
+ }
+
+
+ /**
+ * Obtain config entry by field index.
+ *
+ * @return config entry or NULL if not found.
+ **/
+ const ResConfigEntry *GetEntry(uint32_t offset) const
+ {
+ return (offset < _entries.size()) ? &_entries[offset] : NULL;
+ }
+};
+
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp
new file mode 100644
index 00000000000..de635d14854
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp
@@ -0,0 +1,246 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+LOG_SETUP(".searchlib.docsummary.resultconfig");
+
+namespace search {
+namespace docsummary {
+
+void
+ResultConfig::Clean()
+{
+ _classLookup.clear();
+ _nameLookup.clear();
+}
+
+
+void
+ResultConfig::Init()
+{
+}
+
+
+ResultConfig::ResultConfig()
+ : _defaultSummaryId(-1),
+ _classLookup(),
+ _nameLookup()
+{
+ Init();
+}
+
+
+ResultConfig::~ResultConfig()
+{
+ Clean();
+}
+
+
+const char *
+ResultConfig::GetResTypeName(ResType type)
+{
+ switch (type) {
+ case RES_INT: return "integer";
+ case RES_SHORT: return "short";
+ case RES_BYTE: return "byte";
+ case RES_FLOAT: return "float";
+ case RES_DOUBLE: return "double";
+ case RES_INT64: return "int64";
+ case RES_STRING: return "string";
+ case RES_DATA: return "data";
+ case RES_LONG_STRING: return "longstring";
+ case RES_LONG_DATA: return "longdata";
+ case RES_XMLSTRING: return "xmlstring";
+ case RES_JSONSTRING: return "jsonstring";
+ case RES_FEATUREDATA: return "featuredata";
+ }
+ return "unknown-type";
+}
+
+void
+ResultConfig::Reset()
+{
+ if (! _classLookup.empty() || _fieldEnum.GetNumEntries() > 0) {
+ Clean();
+ Init();
+ }
+}
+
+
+ResultClass *
+ResultConfig::AddResultClass(const char *name, uint32_t id)
+{
+ ResultClass *ret = NULL;
+
+ if (id != NoClassID() && (_classLookup.find(id) == _classLookup.end())) {
+ ResultClass::UP rc(new ResultClass(name, id, _fieldEnum));
+ ret = rc.get();
+ _classLookup[id] = std::move(rc);
+ if (_nameLookup.find(name) != _nameLookup.end()) {
+ LOG(warning, "Duplicate result class name: %s "
+ "(now maps to class id %u)", name, id);
+ }
+ _nameLookup[name] = id;
+ }
+ return ret;
+}
+
+
+const ResultClass*
+ResultConfig::LookupResultClass(uint32_t id) const
+{
+ IdMap::const_iterator it(_classLookup.find(id));
+ return (it != _classLookup.end()) ? it->second.get() : NULL;
+}
+
+uint32_t
+ResultConfig::LookupResultClassId(const vespalib::string &name, uint32_t def) const
+{
+ NameMap::const_iterator found(_nameLookup.find(name));
+ return (found != _nameLookup.end()) ? found->second : def;
+}
+
+uint32_t
+ResultConfig::LookupResultClassId(const vespalib::string &name) const
+{
+ return LookupResultClassId(name, (name.empty() || (name == "default")) ? _defaultSummaryId : NoClassID());
+}
+
+
+void
+ResultConfig::CreateEnumMaps()
+{
+ for (IdMap::iterator it(_classLookup.begin()), mt(_classLookup.end()); it != mt; it++) {
+ it ->second->CreateEnumMap();
+ }
+}
+
+
+bool
+ResultConfig::ReadConfig(const vespa::config::search::SummaryConfig &cfg, const char *configId)
+{
+ bool rc = true;
+ Reset();
+ int maxclassID = 0x7fffffff; // avoid negative classids
+ _defaultSummaryId = cfg.defaultsummaryid;
+ for (uint32_t i = 0; rc && i < cfg.classes.size(); i++) {
+ if (cfg.classes[i].name.empty()) {
+ LOG(warning, "%s classes[%d]: empty name", configId, i);
+ }
+ int classID = cfg.classes[i].id;
+ if (classID < 0 || classID > maxclassID) {
+ LOG(error, "%s classes[%d]: bad id %d", configId, i, classID);
+ rc = false;
+ break;
+ }
+ ResultClass *resClass = AddResultClass(cfg.classes[i].name.c_str(), classID);
+ if (resClass == NULL) {
+ LOG(error,
+ "%s: unable to add classes[%d] name %s",
+ configId, i, cfg.classes[i].name.c_str());
+ rc = false;
+ break;
+ }
+ for (unsigned int j = 0; rc && j < cfg.classes[i].fields.size(); j++) {
+ const char *fieldtype = cfg.classes[i].fields[j].type.c_str();
+ const char *fieldname = cfg.classes[i].fields[j].name.c_str();
+ LOG(debug, "Reconfiguring class '%s' field '%s' of type '%s'", cfg.classes[i].name.c_str(), fieldname, fieldtype);
+ if (strcmp(fieldtype, "integer") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_INT);
+ } else if (strcmp(fieldtype, "short") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_SHORT);
+ } else if (strcmp(fieldtype, "byte") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_BYTE);
+ } else if (strcmp(fieldtype, "float") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_FLOAT);
+ } else if (strcmp(fieldtype, "double") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_DOUBLE);
+ } else if (strcmp(fieldtype, "int64") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_INT64);
+ } else if (strcmp(fieldtype, "string") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_STRING);
+ } else if (strcmp(fieldtype, "data") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_DATA);
+ } else if (strcmp(fieldtype, "longstring") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_LONG_STRING);
+ } else if (strcmp(fieldtype, "longdata") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_LONG_DATA);
+ } else if (strcmp(fieldtype, "xmlstring") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_XMLSTRING);
+ } else if (strcmp(fieldtype, "jsonstring") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_JSONSTRING);
+ } else if (strcmp(fieldtype, "featuredata") == 0) {
+ rc = resClass->AddConfigEntry(fieldname, RES_FEATUREDATA);
+ } else { // FAIL: unknown field type
+ LOG(error,
+ "%s %s.fields[%d]: unknown type '%s'",
+ configId, cfg.classes[i].name.c_str(), j, fieldtype);
+ rc = false;
+ break;
+ }
+ if (!rc) { // FAIL: duplicate field name
+ LOG(error,
+ "%s %s.fields[%d]: duplicate name '%s'",
+ configId, cfg.classes[i].name.c_str(), j, fieldname);
+ break;
+ }
+ }
+ }
+ if (rc) {
+ CreateEnumMaps(); // create mappings needed by TVM
+ } else {
+ Reset(); // FAIL, discard all config
+ }
+ return rc;
+}
+
+uint32_t
+ResultConfig::GetClassID(const char *buf, uint32_t buflen)
+{
+ uint32_t ret = NoClassID();
+ uint32_t tmp32;
+
+ if (buflen >= sizeof(tmp32)) {
+ memcpy(&tmp32, buf, sizeof(tmp32));
+ ret = tmp32;
+ }
+ return ret;
+}
+
+urlresult*
+ResultConfig::Unpack(uint32_t partition,
+ uint32_t docid,
+ HitRank metric,
+ const char *buf,
+ uint32_t buflen) const
+{
+ urlresult *ret = NULL;
+ const ResultClass *resClass = NULL;
+ uint32_t tmp32;
+
+ if (buflen >= sizeof(tmp32)) {
+ memcpy(&tmp32, buf, sizeof(tmp32));
+ buf += sizeof(tmp32);
+ buflen -= sizeof(tmp32);
+ resClass = LookupResultClass(tmp32);
+ }
+
+ if (resClass != NULL && (buflen > 0)) {
+ ret = new GeneralResult(resClass, partition, docid, metric);
+ if (ret->unpack(buf, buflen) != 0) { // FAIL: unpack
+ delete ret;
+ ret = NULL;
+ }
+ }
+
+ return (ret != NULL) ? ret : new badurlresult(partition, docid, metric);
+}
+
+} // namespace docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h
new file mode 100644
index 00000000000..ed01dcdf6b3
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h
@@ -0,0 +1,301 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/config-summary.h>
+#include <vespa/fnet/frt/frt.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchlib/util/stringenum.h>
+#include <vespa/searchsummary/docsummary/resultclass.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * This class represents the overall result configuration. A result
+ * configuration may contain multiple result classes, where each
+ * result class represents a specific docsum blob format. The first n
+ * (32) bits in the docsum blob defines the id of a result
+ * class. The rest of the data contained in the docsum blob is then
+ * defined by the sequence of config entries held by the result class
+ * with the given id. Unpacking of docsum blobs is performed by first
+ * extracting the result class id and then using the appropriate
+ * result class to unpack the rest of the docsum fields. The
+ * extraction of the class id is done by the Unpack method in this
+ * class, while the unpacking of the docsum fields is done by a
+ * GeneralResult object backed by a ResultClass object.
+ **/
+class ResultConfig
+{
+private:
+ ResultConfig(const ResultConfig &);
+ ResultConfig& operator=(const ResultConfig &);
+
+ typedef vespalib::hash_map<vespalib::string, uint32_t> NameMap;
+ typedef vespalib::hash_map<uint32_t, ResultClass::UP> IdMap;
+ uint32_t _defaultSummaryId;
+ search::util::StringEnum _fieldEnum;
+ IdMap _classLookup;
+ NameMap _nameLookup; // name -> class id
+
+ void Clean();
+ void Init();
+
+public:
+ class iterator {
+ public:
+ iterator(IdMap::iterator it) : _it(it) { }
+ iterator operator ++(int) { iterator tmp(_it); ++_it; return tmp; }
+ iterator & operator ++() { ++_it; return *this; }
+ bool operator == (const iterator & b) const { return _it == b._it; }
+ bool operator != (const iterator & b) const { return _it != b._it; }
+ ResultClass & operator *() { return *_it->second; }
+ ResultClass * operator ->() { return _it->second.get(); }
+ private:
+ IdMap::iterator _it;
+ };
+
+ class const_iterator {
+ public:
+ const_iterator(IdMap::const_iterator it) : _it(it) { }
+ const_iterator operator ++(int) { const_iterator tmp(_it); ++_it; return tmp; }
+ const_iterator & operator ++() { ++_it; return *this; }
+ bool operator == (const const_iterator & b) const { return _it == b._it; }
+ bool operator != (const const_iterator & b) const { return _it != b._it; }
+ const ResultClass & operator *() const { return *_it->second; }
+ const ResultClass * operator ->() const { return _it->second.get(); }
+ private:
+ IdMap::const_iterator _it;
+ };
+
+ iterator begin() { return iterator(_classLookup.begin()); }
+ iterator end() { return iterator(_classLookup.end()); }
+ const_iterator begin() const { return const_iterator(_classLookup.begin()); }
+ const_iterator end() const { return const_iterator(_classLookup.end()); }
+
+ /**
+ * Constructor. Create an initially empty result configuration.
+ * NOTE: This method simply calls the Init method.
+ **/
+ ResultConfig();
+
+ /**
+ * Destructor. Delete all internal structures. NOTE: This method
+ * simply calls the Clean method.
+ **/
+ ~ResultConfig();
+
+
+ /**
+ * @return value denoting an undefined class id.
+ **/
+ static uint32_t NoClassID() { return static_cast<uint32_t>(-1); }
+
+
+ /**
+ * Determine if a result field type is of variable size.
+ *
+ * @return true for variable size field types, false for fixed
+ * size field types
+ **/
+ static bool IsVariableSize(ResType t) { return (t >= RES_STRING); }
+
+
+ /**
+ * Determine if a pair of result field types are binary
+ * compatible. A pair of types are binary compatible if the packed
+ * representation is identical.
+ *
+ * @return true if the given types are binary compatible.
+ * @param a enum value of a result field type.
+ * @param b enum value of a result field type.
+ **/
+ static bool IsBinaryCompatible(ResType a, ResType b)
+ {
+ if (a == b) {
+ return true;
+ }
+ switch (a) {
+ case RES_STRING:
+ case RES_DATA:
+ return (b == RES_STRING || b == RES_DATA);
+ case RES_LONG_STRING:
+ case RES_LONG_DATA:
+ case RES_XMLSTRING:
+ case RES_FEATUREDATA:
+ case RES_JSONSTRING:
+ return (b == RES_LONG_STRING || b == RES_LONG_DATA ||
+ b == RES_XMLSTRING || b == RES_FEATUREDATA || b == RES_JSONSTRING);
+ default:
+ return false;
+ }
+ return false;
+ }
+
+
+ /**
+ * Determine if a pair of result field types are runtime
+ * compatible. A pair of types are runtime compatible if the
+ * unpacked (@ref ResEntry) representation is identical.
+ *
+ * @return true if the given types are runtime compatible.
+ * @param a enum value of a result field type.
+ * @param b enum value of a result field type.
+ **/
+ static bool IsRuntimeCompatible(ResType a, ResType b)
+ {
+ switch (a) {
+ case RES_INT:
+ case RES_SHORT:
+ case RES_BYTE:
+ return (b == RES_INT || b == RES_SHORT || b == RES_BYTE);
+ case RES_FLOAT:
+ case RES_DOUBLE:
+ return (b == RES_FLOAT || b == RES_DOUBLE);
+ case RES_INT64:
+ return b == RES_INT64;
+ case RES_STRING:
+ case RES_LONG_STRING:
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ return (b == RES_STRING || b == RES_LONG_STRING || b == RES_XMLSTRING || b == RES_JSONSTRING);
+ case RES_DATA:
+ case RES_LONG_DATA:
+ return (b == RES_DATA || b == RES_LONG_DATA);
+ case RES_FEATUREDATA:
+ return (b == RES_FEATUREDATA);
+ }
+ return false;
+ }
+
+
+ /**
+ * @return the name of the given result field type.
+ * @param resType enum value of a result field type.
+ **/
+ static const char *GetResTypeName(ResType type);
+
+ /**
+ * Discard the current configuration and start over. After this
+ * method returns, the state of this object will be equal to the
+ * state right after it was created. This method may call both Clean
+ * and Init.
+ **/
+ void Reset();
+
+
+ /**
+ * Add a new result class to this result configuration. This will
+ * create a new result class object and insert it into the lookup
+ * structure. This method will fail if another class with the same
+ * ID has already been added or if the given ID is illegal.
+ *
+ * @return newly created result class object or NULL.
+ * @param name name of result class to add.
+ * @param classID id of result class to add.
+ **/
+ ResultClass *AddResultClass(const char *name, uint32_t classID);
+
+
+ /**
+ * Obtain result class from the result class id. This method is used
+ * when unpacking docsum blobs.
+ *
+ * @return result class with the given id or NULL if not found.
+ * @param classID the id of the result class to look up.
+ **/
+ const ResultClass *LookupResultClass(uint32_t classID) const;
+
+
+ /**
+ * Obtain result class id from the result class name.
+ *
+ * @return result class id or 'def' if not found
+ * @param name the name of the result class
+ * @param def default return value if not found
+ **/
+ uint32_t LookupResultClassId(const vespalib::string &name, uint32_t def) const;
+
+ /**
+ * Obtain result class id from the result class name.
+ *
+ * @return result class id or configured default if empty or "default".
+ * @param name the name of the result class, NoClassId(-1) meaning undefined
+ **/
+ uint32_t LookupResultClassId(const vespalib::string &name) const;
+
+
+ /**
+ * Obtain the number of result classes held by this result
+ * configuration.
+ *
+ * @return number of result classes.
+ **/
+ uint32_t GetNumResultClasses() const { return _classLookup.size(); }
+
+
+ /**
+ * Obtain the string enumeration object that holds the mapping from
+ * field name to field name enumerated value.
+ *
+ * @return field name enumeration.
+ **/
+ const search::util::StringEnum & GetFieldNameEnum() const { return _fieldEnum; }
+
+
+ /**
+ * This method calls the CreateEnumMap on all result classes held by
+ * this object. This is needed in order to look up fields by field
+ * name enumerated value.
+ **/
+ void CreateEnumMaps();
+
+ /**
+ * Read config that has been fetched from configserver.
+ *
+ * @return true(success)/false(fail)
+ * @param configId reference on server
+ **/
+ bool ReadConfig(const vespa::config::search::SummaryConfig &cfg, const char *configId);
+
+ /**
+ * Inspect a docsum blob and return the class id of the docsum
+ * contained within it. This method is useful if you want to know
+ * what it is before deciding whether to unpack it.
+ *
+ * @return docsum blob class id.
+ * @param buf docsum blob.
+ * @param buflen length of docsum blob.
+ **/
+ uint32_t GetClassID(const char *buf, uint32_t buflen);
+
+ /**
+ * Unpack docsum blob. The first n (0/8/16/32) bits are read from
+ * the data given and used to look up the appropriate result
+ * class. A GeneralResult object is created based on that
+ * class and told to unpack the rest of the docsum blob. If this
+ * operation succeeds, the GeneralResult object is
+ * returned. It if fails, a badurlresult object is returned
+ * instead.
+ *
+ * @return object representing the unpacked result.
+ * @param partition partition path for current hit.
+ * @param docid docid for current hit.
+ * @param metric relevance estimate for current hit.
+ * @param buf docsum blob.
+ * @param buflen length of docsum blob.
+ **/
+ urlresult *
+ Unpack(uint32_t partition,
+ uint32_t docid,
+ HitRank metric,
+ const char *buf,
+ uint32_t buflen) const;
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp
new file mode 100644
index 00000000000..5648702eb7f
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp
@@ -0,0 +1,266 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchcommon/common/undefinedvalues.h>
+#include <vespa/searchsummary/docsummary/resultpacker.h>
+#include <zlib.h>
+
+LOG_SETUP(".searchlib.docsummary.resultpacker");
+
+namespace search {
+namespace docsummary {
+
+void
+ResultPacker::WarnType(ResType type)
+{
+ LOG(debug,
+ "ResultPacker: got '%s', expected '%s' "
+ "(fields are binary compatible)",
+ GetResTypeName(type),
+ GetResTypeName(_cfgEntry->_type));
+}
+
+
+void
+ResultPacker::SetFormatError(ResType type)
+{
+ _error = true;
+
+ if (_cfgEntry != NULL) {
+ LOG(error,
+ "ResultPacker: format error: got '%s', expected '%s'",
+ GetResTypeName(type),
+ GetResTypeName(_cfgEntry->_type));
+ } else {
+ LOG(error,
+ "ResultPacker: format error: "
+ "got '%s', no more fields expected", GetResTypeName(type));
+ }
+}
+
+
+ResultPacker::ResultPacker(const ResultConfig *resConfig)
+ : _buf(32768),
+ _cbuf(32768),
+ _resConfig(resConfig),
+ _resClass(NULL),
+ _entryIdx(0),
+ _cfgEntry(NULL),
+ _error(true)
+{
+}
+
+
+ResultPacker::~ResultPacker()
+{
+}
+
+void
+ResultPacker::InitPlain()
+{
+ _buf.reset();
+}
+
+bool
+ResultPacker::Init(uint32_t classID)
+{
+ _buf.reset();
+ _resClass = (_resConfig != NULL) ?
+ _resConfig->LookupResultClass(classID) : NULL;
+ _entryIdx = 0;
+ if (_resClass != NULL) {
+ uint32_t id = _resClass->GetClassID();
+ _buf.append(&id, sizeof(id));
+ _cfgEntry = _resClass->GetEntry(_entryIdx);
+ _error = false;
+ } else {
+ _cfgEntry = NULL;
+ _error = true;
+
+ LOG(error, "ResultPacker: resultclass %d does not exist", classID);
+ }
+
+ return !_error;
+}
+
+
+bool
+ResultPacker::AddEmpty()
+{
+ if (!_error && _cfgEntry != NULL) {
+ switch (_cfgEntry->_type) {
+ case RES_INT: return AddInteger(search::attribute::getUndefined<int32_t>());
+ case RES_SHORT: return AddShort(search::attribute::getUndefined<int16_t>());
+ case RES_BYTE: return AddByte(search::attribute::getUndefined<int8_t>());
+ case RES_FLOAT: return AddFloat(search::attribute::getUndefined<float>());
+ case RES_DOUBLE: return AddDouble(search::attribute::getUndefined<double>());
+ case RES_INT64: return AddInt64(search::attribute::getUndefined<int64_t>());
+ case RES_STRING: return AddString(NULL, 0);
+ case RES_DATA: return AddData(NULL, 0);
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING: return AddLongString(NULL, 0);
+ case RES_LONG_DATA: return AddLongData(NULL, 0);
+ }
+ }
+ return AddInteger(0); // to provoke error condition
+}
+
+
+bool
+ResultPacker::AddByte(uint8_t value)
+{
+ if (CheckEntry(RES_BYTE))
+ AddByteForce(value);
+ return !_error;
+}
+
+void
+ResultPacker::AddByteForce(uint8_t value)
+{
+ _buf.append(&value, sizeof(value));
+}
+
+bool
+ResultPacker::AddShort(uint16_t value)
+{
+ if (CheckEntry(RES_SHORT))
+ AddShortForce(value);
+ return !_error;
+}
+
+void
+ResultPacker::AddShortForce(uint16_t value)
+{
+ _buf.append(&value, sizeof(value));
+}
+
+
+bool
+ResultPacker::AddInteger(uint32_t value)
+{
+ if (CheckEntry(RES_INT))
+ AddIntegerForce(value);
+ return !_error;
+}
+
+void
+ResultPacker::AddIntegerForce(uint32_t value)
+{
+ _buf.append(&value, sizeof(value));
+}
+
+
+bool
+ResultPacker::AddFloat(float value)
+{
+ if (CheckEntry(RES_FLOAT))
+ _buf.append(&value, sizeof(value));
+ return !_error;
+}
+
+
+bool
+ResultPacker::AddDouble(double value)
+{
+ if (CheckEntry(RES_DOUBLE))
+ _buf.append(&value, sizeof(value));
+ return !_error;
+}
+
+
+bool
+ResultPacker::AddInt64(uint64_t value)
+{
+ if (CheckEntry(RES_INT64))
+ _buf.append(&value, sizeof(value));
+ return !_error;
+}
+
+
+bool
+ResultPacker::AddString(const char *str, uint32_t slen)
+{
+ if (CheckEntry(RES_STRING))
+ AddStringForce(str, slen);
+ return !_error;
+}
+
+void
+ResultPacker::AddStringForce(const char *str, uint32_t slen)
+{
+ uint16_t len = slen;
+ _buf.append(&len, sizeof(len));
+ _buf.append(str, len);
+}
+
+
+bool
+ResultPacker::AddData(const char *buf, uint32_t buflen)
+{
+ if (CheckEntry(RES_DATA)) {
+ uint16_t len = buflen;
+ _buf.append(&len, sizeof(len));
+ _buf.append(buf, len);
+ }
+ return !_error;
+}
+
+
+bool
+ResultPacker::AddLongString(const char *str, uint32_t slen)
+{
+ if (CheckEntry(RES_LONG_STRING)) {
+ _buf.append(&slen, sizeof(slen));
+ _buf.append(str, slen);
+ }
+ return !_error;
+}
+
+
+bool
+ResultPacker::AddLongData(const char *buf, uint32_t buflen)
+{
+ if (CheckEntry(RES_LONG_DATA)) {
+ _buf.append(&buflen, sizeof(buflen));
+ _buf.append(buf, buflen);
+ }
+ return !_error;
+}
+
+
+bool
+ResultPacker::GetDocsumBlob(const char **buf, uint32_t *buflen)
+{
+ if (!_error &&
+ _entryIdx != _resClass->GetNumEntries())
+ {
+ _error = true;
+ LOG(error,
+ "ResultPacker: format error: %d fields are missing",
+ _resClass->GetNumEntries() - _entryIdx);
+ }
+ if (_error) {
+ *buf = NULL;
+ *buflen = 0;
+ return false;
+ } else {
+ *buf = _buf.GetDrainPos();
+ *buflen = _buf.GetUsedLen();
+ return true;
+ }
+}
+
+void
+ResultPacker::GetDocsumBlobForce(const char **buf, uint32_t *buflen)
+{
+ *buf = _buf.GetDrainPos();
+ *buflen = _buf.GetUsedLen();
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h
new file mode 100644
index 00000000000..25763fd06a8
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h
@@ -0,0 +1,271 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+
+namespace search {
+namespace docsummary {
+/**
+ * An Object of this class may be used to create docsum blobs. A
+ * single blob is created by first indicating what result class the
+ * blob should conform to. After that, each docsum field is added with
+ * an individual method call. The blob may then be extracted by a
+ * final method call. Note that objects of this class may be re-used
+ * to create multiple blobs each.
+ **/
+class ResultPacker
+{
+private:
+ ResultPacker(const ResultPacker &);
+ ResultPacker& operator=(const ResultPacker &);
+
+ search::RawBuf _buf; // packing buffer
+ search::RawBuf _cbuf; // compression buffer
+ const ResultConfig *_resConfig; // result config
+ const ResultClass *_resClass; // result class of current blob
+ uint32_t _entryIdx; // current field index of current blob
+ const ResConfigEntry *_cfgEntry; // current field of current blob
+ bool _error; // error flag for current blob
+
+ static const char *GetResTypeName(ResType type)
+ { return ResultConfig::GetResTypeName(type); }
+
+ static bool IsBinaryCompatible(ResType a, ResType b)
+ { return ResultConfig::IsBinaryCompatible(a, b); }
+
+ void WarnType(ResType type);
+ void SetFormatError(ResType type);
+
+ bool CheckEntry(ResType type)
+ {
+ if (_error)
+ return false;
+
+ bool rc = (_cfgEntry != NULL &&
+ IsBinaryCompatible(_cfgEntry->_type, type));
+
+ if (rc) {
+ if (_cfgEntry->_type != type) {
+ WarnType(type);
+ }
+ _cfgEntry = _resClass->GetEntry(++_entryIdx);
+ } else {
+ SetFormatError(type);
+ }
+
+ return rc;
+ }
+
+public:
+ /**
+ * Create a result packer based on the given result config. Note
+ * that the result config object is NOT handed over; it is the
+ * responsibility of the application to ensure that the lifetime of
+ * the result config object is longer than the lifetime of the
+ * created result packer object.
+ *
+ * @param resConfig result configuration.
+ **/
+ explicit ResultPacker(const ResultConfig *resConfig);
+ ~ResultPacker();
+
+
+ /**
+ * Start creating new docsum blob without result class.
+ * (Bypassing type-checks.)
+ **/
+ void InitPlain();
+
+ /**
+ * Start creating a new docsum blob of the given result class.
+ *
+ * @return true(ok)/false(error).
+ * @param classID the id of the result class we want to create a
+ * docsum blob conforming to.
+ **/
+ bool Init(uint32_t classID);
+
+ /**
+ * Add empty field of appropriate type.
+ *
+ * @return true(ok)/false(error).
+ **/
+ bool AddEmpty();
+
+ /**
+ * Add a 'byte' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init.
+ *
+ * @return true(ok)/false(error).
+ * @param value byte value of field to add.
+ **/
+ bool AddByte(uint8_t value);
+
+ void AddByteForce(uint8_t value);
+
+ /**
+ * Add a 'short' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init.
+ *
+ * @return true(ok)/false(error).
+ * @param value short value of field to add.
+ **/
+ bool AddShort(uint16_t value);
+
+ void AddShortForce(uint16_t value);
+
+
+ /**
+ * Add a 'integer' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init.
+ *
+ * @return true(ok)/false(error).
+ * @param value integer value of field to add.
+ **/
+ bool AddInteger(uint32_t value);
+
+ void AddIntegerForce(uint32_t value);
+
+
+ /**
+ * Add a 'float' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init.
+ *
+ * @return true(ok)/false(error).
+ * @param value float value of field to add.
+ **/
+ bool AddFloat(float value);
+
+
+ /**
+ * Add a 'double' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init.
+ *
+ * @return true(ok)/false(error).
+ * @param value double value of field to add.
+ **/
+ bool AddDouble(double value);
+
+
+ /**
+ * Add a 'int64' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init.
+ *
+ * @return true(ok)/false(error).
+ * @param value int64 value of field to add.
+ **/
+ bool AddInt64(uint64_t value);
+
+
+ /**
+ * Add a 'string' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init. The maximum length
+ * of this field is 64kB.
+ *
+ * @return true(ok)/false(error).
+ * @param str pointer to string to add.
+ * @param slen length of string to add.
+ **/
+ bool AddString(const char *str, uint32_t slen);
+
+ void AddStringForce(const char *str, uint32_t slen);
+
+ /**
+ * Add a 'data' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init. The maximum length
+ * of this field is 64kB.
+ *
+ * @return true(ok)/false(error).
+ * @param buf pointer to data to add.
+ * @param buflen length of data to add.
+ **/
+ bool AddData(const char *buf, uint32_t buflen);
+
+
+ /**
+ * Add a 'longstring' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init. The maximum length
+ * of this field is 2GB.
+ *
+ * @return true(ok)/false(error).
+ * @param str pointer to string to add.
+ * @param slen length of string to add.
+ **/
+ bool AddLongString(const char *str, uint32_t slen);
+
+
+ /**
+ * Add a 'longdata' field to the docsum blob we are currently
+ * creating. Note that this method will fail if the type of the
+ * added field is not compatible with the field type sequence
+ * defined in the result class config. This method will also fail if
+ * an error condition is already detected. The only way to clear the
+ * error state is with another call to @ref Init. The maximum length
+ * of this field is 2GB.
+ *
+ * @return true(ok)/false(error).
+ * @param buf pointer to data to add.
+ * @param buflen length of data to add.
+ **/
+ bool AddLongData(const char *buf, uint32_t buflen);
+
+
+ /**
+ * Obtain a pointer to, and the length of, the created docsum
+ * blob. This method will fail if an error was previously detected,
+ * or if any docsum fields were missing (too few fields were
+ * added). Note that calling the @ref Init method invalidates the
+ * obtained docsum blob.
+ *
+ * @return true(ok)/false(error).
+ * @param buf where to store the pointer to the docsum blob.
+ * @param buflen where to store the length of the docsum blob.
+ **/
+ bool GetDocsumBlob(const char **buf, uint32_t *buflen);
+
+ void GetDocsumBlobForce(const char **buf, uint32_t *buflen);
+};
+
+}
+}
+
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp
new file mode 100644
index 00000000000..8b180dc3d78
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp
@@ -0,0 +1,160 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.docsummary.summaryfeaturesdfw");
+#include <vespa/searchlib/common/featureset.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchsummary/docsummary/docsumformat.h>
+#include "summaryfeaturesdfw.h"
+
+namespace search {
+namespace docsummary {
+
+
+SummaryFeaturesDFW::SummaryFeaturesDFW() :
+ _env(NULL)
+{
+}
+
+SummaryFeaturesDFW::~SummaryFeaturesDFW()
+{
+}
+
+void
+SummaryFeaturesDFW::init(IDocsumEnvironment * env)
+{
+ _env = env;
+}
+
+static vespalib::string _G_cached("vespa.summaryFeatures.cached");
+static vespalib::slime::Memory _M_cached("vespa.summaryFeatures.cached");
+
+void
+SummaryFeaturesDFW::insertField(uint32_t docid,
+ GeneralResult *,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target)
+{
+ if (state->_summaryFeatures.get() == 0) {
+ state->_callback.FillSummaryFeatures(state, _env);
+ if (state->_summaryFeatures.get() == 0) { // still no summary features to write
+ return;
+ }
+ }
+ const FeatureSet::StringVector &names = state->_summaryFeatures->getNames();
+ const feature_t *values = state->_summaryFeatures->getFeaturesByDocId(docid);
+ if (type == RES_FEATUREDATA && values != NULL) {
+ vespalib::slime::Cursor& obj = target.insertObject();
+ for (uint32_t i = 0; i < names.size(); ++i) {
+ vespalib::slime::Memory name(names[i].c_str(), names[i].size());
+ obj.setDouble(name, values[i]);
+ }
+ if (state->_summaryFeaturesCached) {
+ obj.setDouble(_M_cached, 1.0);
+ } else {
+ obj.setDouble(_M_cached, 0.0);
+ }
+ return;
+ }
+ vespalib::JSONStringer & json(state->_jsonStringer);
+ if (values != NULL) {
+ json.clear();
+ json.beginObject();
+ for (uint32_t i = 0; i < names.size(); ++i) {
+ featureDump(json, names[i], values[i]);
+ }
+ json.appendKey(_G_cached);
+ if (state->_summaryFeaturesCached) {
+ json.appendDouble(1.0);
+ } else {
+ json.appendDouble(0.0);
+ }
+ json.endObject();
+ vespalib::slime::Memory value(json.toString().c_str(),
+ json.toString().size());
+ if (type == RES_STRING || type == RES_LONG_STRING) {
+ target.insertString(value);
+ }
+ if (type == RES_DATA || type == RES_LONG_DATA) {
+ target.insertData(value);
+ }
+ json.clear();
+ }
+}
+
+uint32_t
+SummaryFeaturesDFW::WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target)
+{
+ (void) gres;
+
+ if (state->_summaryFeatures.get() == 0) {
+ state->_callback.FillSummaryFeatures(state, _env);
+ if (state->_summaryFeatures.get() == 0) { // still no summary features to write
+ return DocsumFormat::addEmpty(type, *target);
+ }
+ }
+
+ uint32_t written = 0;
+
+ const FeatureSet::StringVector &names = state->_summaryFeatures->getNames();
+ vespalib::JSONStringer & json(state->_jsonStringer);
+ const feature_t *values = state->_summaryFeatures->getFeaturesByDocId(docid);
+ if (values != NULL) {
+ json.clear();
+ json.beginObject();
+ for (uint32_t i = 0; i < names.size(); ++i) {
+ featureDump(json, names[i], values[i]);
+ }
+ json.appendKey(_G_cached);
+ if (state->_summaryFeaturesCached) {
+ json.appendDouble(1.0);
+ } else {
+ json.appendDouble(0.0);
+ }
+ json.endObject();
+
+ written += writeString(json.toString(), type, target);
+ json.clear();
+ } else {
+ written += DocsumFormat::addEmpty(type, *target);
+ }
+
+ return written;
+}
+
+void FeaturesDFW::featureDump(vespalib::JSONStringer & json, const vespalib::stringref & name, double feature)
+{
+ json.appendKey(name);
+ if (std::isnan(feature) || std::isinf(feature)) {
+ json.appendNull();
+ } else {
+ json.appendDouble(feature);
+ }
+}
+
+
+uint32_t
+SummaryFeaturesDFW::writeString(const vespalib::stringref & str, ResType type, search::RawBuf * target)
+{
+ switch (type) {
+ case RES_STRING:
+ case RES_DATA:
+ return DocsumFormat::addShortData(*target, str.c_str(), str.size());
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING:
+ case RES_LONG_DATA:
+ return DocsumFormat::addLongData(*target, str.c_str(), str.size());
+ default:
+ LOG(error, "unhandled type %u in writeString()", type);
+ return DocsumFormat::addEmpty(type, *target);
+ }
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h
new file mode 100644
index 00000000000..c9a6c5d9d9a
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+#include <vespa/vespalib/util/jsonwriter.h>
+
+namespace search {
+namespace docsummary {
+
+class FeaturesDFW : public IDocsumFieldWriter
+{
+protected:
+ void featureDump(vespalib::JSONStringer & json, const vespalib::stringref & name, double feature);
+};
+
+class SummaryFeaturesDFW : public FeaturesDFW
+{
+private:
+ SummaryFeaturesDFW(const SummaryFeaturesDFW &);
+ SummaryFeaturesDFW & operator=(const SummaryFeaturesDFW &);
+
+ IDocsumEnvironment * _env;
+
+public:
+ SummaryFeaturesDFW();
+ virtual ~SummaryFeaturesDFW();
+ void init(IDocsumEnvironment * env);
+ virtual bool IsGenerated() const { return true; }
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+
+ static uint32_t writeString(const vespalib::stringref & str, ResType type, search::RawBuf * target);
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.cpp
new file mode 100644
index 00000000000..dfb7b863133
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.cpp
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.docsummary.textextractordfw");
+#include "tokenizer.h"
+#include "textextractordfw.h"
+
+namespace search {
+namespace docsummary {
+
+TextExtractorDFW::TextExtractorDFW() :
+ _inputFieldEnum(-1)
+{
+}
+
+bool
+TextExtractorDFW::init(const vespalib::string & fieldName, const vespalib::string & inputField, const ResultConfig & config)
+{
+ _inputFieldEnum = config.GetFieldNameEnum().Lookup(inputField.c_str());
+ if (_inputFieldEnum == -1) {
+ LOG(warning, "Did not find input field '%s' as part of the docsum fields when initializing writer for field '%s'",
+ inputField.c_str(), fieldName.c_str());
+ return false;
+ }
+ return true;
+}
+
+void
+TextExtractorDFW::insertField(uint32_t,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType,
+ vespalib::slime::Inserter &target)
+{
+ vespalib::string extracted;
+ ResEntry * entry = gres->GetEntryFromEnumValue(_inputFieldEnum);
+ if (entry != NULL) {
+ const char * buf = NULL;
+ uint32_t buflen = 0;
+ entry->_resolve_field(&buf, &buflen, &state->_docSumFieldSpace);
+ // extract the text
+ Tokenizer tokenizer(buf, buflen);
+ while (tokenizer.hasMoreTokens()) {
+ Tokenizer::Token token = tokenizer.getNextToken();
+ extracted.append(token.getText());
+ }
+ } else {
+ LOG(warning, "Did not find input entry using field enum %d. Write an empty field", _inputFieldEnum);
+ }
+ target.insertString(vespalib::slime::Memory(extracted.c_str(), extracted.size()));
+}
+
+uint32_t
+TextExtractorDFW::WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target)
+{
+ (void) docid;
+ (void) type;
+ uint32_t slen = 0;
+ uint32_t begin = target->GetUsedLen();
+ // write text length
+ target->append(&slen, sizeof(slen));
+
+ ResEntry * entry = gres->GetEntryFromEnumValue(_inputFieldEnum);
+ if (entry != NULL) {
+ const char * buf = NULL;
+ uint32_t buflen = 0;
+ entry->_resolve_field(&buf, &buflen, &state->_docSumFieldSpace);
+ // extract the text
+ Tokenizer tokenizer(buf, buflen);
+ while (tokenizer.hasMoreTokens()) {
+ Tokenizer::Token token = tokenizer.getNextToken();
+ target->append(token.getText().c_str(), token.getText().size());
+ }
+ } else {
+ LOG(warning, "Did not find input entry using field enum %d. Write an empty field", _inputFieldEnum);
+ }
+
+ // calculate number of bytes written
+ uint32_t written = target->GetUsedLen() - begin;
+ // patch in correct text length
+ slen = written - sizeof(slen);
+ memcpy(target->GetWritableDrainPos(begin), &slen, sizeof(slen));
+
+ return written;
+}
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.h b/searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.h
new file mode 100644
index 00000000000..f22d5b3daa4
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/textextractordfw.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * This is the docsum field writer used to extract the original text from a disk summary on the juniper format.
+ **/
+class TextExtractorDFW : public IDocsumFieldWriter
+{
+private:
+ TextExtractorDFW(const TextExtractorDFW &);
+ TextExtractorDFW & operator=(const TextExtractorDFW &);
+
+ int _inputFieldEnum;
+
+public:
+ TextExtractorDFW();
+ virtual ~TextExtractorDFW() {}
+ bool init(const vespalib::string & fieldName, const vespalib::string & inputField, const ResultConfig & config);
+ // Inherit doc
+ virtual bool IsGenerated() const { return false; }
+ // Inherit doc
+ virtual uint32_t WriteField(uint32_t docid,
+ GeneralResult * gres,
+ GetDocsumsState * state,
+ ResType type,
+ search::RawBuf * target);
+ virtual void insertField(uint32_t docid,
+ GeneralResult *gres,
+ GetDocsumsState *state,
+ ResType type,
+ vespalib::slime::Inserter &target);
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/tokenizer.cpp b/searchsummary/src/vespa/searchsummary/docsummary/tokenizer.cpp
new file mode 100644
index 00000000000..61a0f8cdfdd
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/tokenizer.cpp
@@ -0,0 +1,112 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".summary.tokenizer");
+#include "tokenizer.h"
+
+namespace search {
+namespace docsummary {
+
+Tokenizer::Token::Type
+Tokenizer::getTokenType(ucs4_t ch) const
+{
+ if (Fast_UnicodeUtil::IsWordChar(ch)) {
+ return Token::WORD;
+ } else {
+ if (Fast_UnicodeUtil::IsTerminalPunctuationChar(ch)) {
+ return Token::PUNCTUATION;
+ } else {
+ return Token::NON_WORD;
+ }
+ }
+}
+
+Tokenizer::Tokenizer(const char * buf, size_t len) :
+ _pos(buf),
+ _begin(buf),
+ _end(buf + len),
+ _tokenBegin(buf),
+ _type(Token::NOT_DEF),
+ _hasMoreTokens(_pos < _end)
+{
+}
+
+void
+Tokenizer::reset(const char * buf, size_t len)
+{
+ _pos = buf;
+ _begin = buf;
+ _end = buf + len;
+ _tokenBegin = buf;
+ _type = Token::NOT_DEF;
+ _hasMoreTokens = (_pos < _end);
+}
+
+bool
+Tokenizer::hasMoreTokens()
+{
+ return _hasMoreTokens;
+}
+
+Tokenizer::Token
+Tokenizer::getNextToken()
+{
+ const char * textBegin = _tokenBegin;
+ const char * textEnd = _pos;
+ const char * stemBegin = NULL;
+ const char * stemEnd = NULL;
+ const char * next = _pos;
+ bool insideAnnotation = false;
+ for (; _pos < _end; ) {
+ ucs4_t ch;
+ if ((unsigned const char)*next < 0x80) {
+ ch = *next++;
+ if (ch == 0x1F) { // unit separator
+ Token t(textBegin, textEnd, stemBegin, stemEnd, _type);
+ _pos = next; // advance to next char
+ _tokenBegin = next; // the next token begins at the next char
+ _type = Token::NOT_DEF; // reset the token type
+ if (_pos == _end) { // this is the last token
+ _hasMoreTokens = false;
+ }
+ return t;
+ }
+ } else {
+ ch = Fast_UnicodeUtil::GetUTF8CharNonAscii(next); // updates next to the next utf8 character
+ if (ch == 0xFFF9) { // anchor
+ insideAnnotation = true;
+ textBegin = next;
+ _type = Token::ANNOTATION;
+ }
+ }
+ if (!insideAnnotation) {
+ Token::Type tmpType = getTokenType(ch);
+ if (_type != Token::NOT_DEF && _type != tmpType) { // we found a new token type
+ Token t(textBegin, textEnd, stemBegin, stemEnd, _type);
+ _tokenBegin = _pos; // the next token begins at this char
+ _pos = next; // advance to next char
+ _type = tmpType; // remember the new token type
+ return t;
+ }
+ _type = tmpType;
+ textEnd = next; // advance to next char
+ } else { // inside annotation
+ if (ch == 0xFFFA) { // separator
+ textEnd = _pos;
+ stemBegin = next;
+ } else if (ch == 0xFFFB && stemBegin != NULL) { // terminator
+ stemEnd = _pos;
+ insideAnnotation = false;
+ }
+ }
+
+ _pos = next;
+ }
+ LOG_ASSERT(_pos == _end);
+ _hasMoreTokens = false;
+ return Token(textBegin, _pos, _type); // return the last token
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/tokenizer.h b/searchsummary/src/vespa/searchsummary/docsummary/tokenizer.h
new file mode 100644
index 00000000000..efd07e16b68
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/tokenizer.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastlib/text/unicodeutil.h>
+#include "itokenizer.h"
+
+namespace search {
+namespace docsummary {
+
+/**
+ * This class is used to tokenize an utf-8 text buffer into tokens of type
+ * WORD, NON_WORD, PUNCTUATION, and ANNOTATION.
+ *
+ * Functions in Fast_UnicodeUtil are used to determine word characters and terminal punctuation characters.
+ * The unit separator 0x1F is always treated as a token separator. The unit separator itself is not returned as a token.
+ * Interlinear annotation (0xFFF9 original 0xFFFA stemmed 0xFFFB) is used to specify the stemmed variant of a word.
+ * The annotation characters are not returned as part of a token.
+ */
+class Tokenizer : public ITokenizer
+{
+private:
+ const char * _pos; // the current position in the input buffer
+ const char * _begin; // the begin of input buffer
+ const char * _end; // the end of the input buffer
+ const char * _tokenBegin; // the start of the next token
+ Token::Type _type; // the type of the current position
+ bool _hasMoreTokens; // do we have more tokens
+
+ Token::Type getTokenType(ucs4_t ch) const;
+
+public:
+ /**
+ * Creates a new tokenizer for the given utf-8 text buffer.
+ */
+ Tokenizer(const char * buf, size_t len);
+
+ // Inherit doc
+ virtual void reset(const char * buf, size_t len);
+ virtual size_t getBufferSize() const { return _end - _begin; }
+ virtual bool hasMoreTokens();
+ virtual Token getNextToken();
+};
+
+}
+}
+
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp
new file mode 100644
index 00000000000..92ebe07d457
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp
@@ -0,0 +1,819 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchsummary/docsummary/urlresult.h>
+#include <vespa/searchsummary/docsummary/resultconfig.h>
+#include <zlib.h>
+
+LOG_SETUP(".searchlib.docsummary.urlresult");
+
+namespace search {
+namespace docsummary {
+
+urlresult::urlresult(uint32_t partition, uint32_t docid, HitRank metric)
+ : _partition(partition),
+ _docid(docid),
+ _metric(metric)
+{}
+
+
+urlresult::~urlresult()
+{
+}
+
+
+/*===============================================================*/
+
+
+badurlresult::badurlresult()
+ : urlresult(0, 0, 0)
+{}
+
+
+badurlresult::badurlresult(uint32_t partition, uint32_t docid, HitRank metric)
+ : urlresult(partition, docid, metric)
+{}
+
+
+badurlresult::~badurlresult()
+{}
+
+
+int
+badurlresult::unpack(const char *buf, const size_t buflen)
+{
+ (void) buf;
+ (void) buflen;
+ LOG(warning, "badurlresult::unpack");
+ return 0;
+}
+
+
+/*===============================================================*/
+
+
+void
+GeneralResult::AllocEntries(uint32_t buflen, bool inplace)
+{
+ uint32_t cnt = _resClass->GetNumEntries();
+ uint32_t needMem = (inplace)
+ ? cnt * sizeof(ResEntry)
+ : cnt * sizeof(ResEntry) + buflen + 1;
+
+ if (cnt > 0) {
+ _entrycnt = cnt;
+ _entries = (ResEntry *) malloc(needMem);
+ assert(_entries != NULL);
+ if (inplace) {
+ _buf = NULL;
+ _bufEnd = NULL;
+ } else {
+ _buf = ((char *)_entries) + cnt * sizeof(ResEntry);
+ _bufEnd = _buf + buflen + 1;
+ }
+ memset(_entries, 0, cnt * sizeof(ResEntry));
+ } else {
+ _entrycnt = 0;
+ _entries = NULL;
+ _buf = NULL;
+ _bufEnd = NULL;
+ }
+}
+
+
+void
+GeneralResult::FreeEntries()
+{
+ uint32_t cnt = _entrycnt;
+
+ // (_buf == NULL) <=> (_inplace_unpack() || (cnt == 0))
+ if (_buf != NULL) {
+ for (uint32_t i = 0; i < cnt; i++) {
+ if (ResultConfig::IsVariableSize(_entries[i]._type) &&
+ !InBuf(_entries[i]._stringval))
+ delete [] (_entries[i]._stringval);
+ }
+ }
+ free(_entries); // free '_entries'/'_buf' chunk
+}
+
+
+
+GeneralResult::GeneralResult(const ResultClass *resClass,
+ uint32_t partition, uint32_t docid,
+ HitRank metric)
+ : urlresult(partition, docid, metric),
+ _resClass(resClass),
+ _entrycnt(0),
+ _entries(NULL),
+ _buf(NULL),
+ _bufEnd(NULL)
+{
+}
+
+
+GeneralResult::~GeneralResult()
+{
+ FreeEntries();
+}
+
+
+ResEntry *
+GeneralResult::GetEntry(uint32_t idx)
+{
+ return (idx < _entrycnt) ? &_entries[idx] : NULL;
+}
+
+
+ResEntry *
+GeneralResult::GetEntry(const char *name)
+{
+ int idx = _resClass->GetIndexFromName(name);
+
+ return (idx >= 0 && (uint32_t)idx < _entrycnt) ?
+ &_entries[idx] : NULL;
+}
+
+
+ResEntry *
+GeneralResult::GetEntryFromEnumValue(uint32_t value)
+{
+ int idx = _resClass->GetIndexFromEnumValue(value);
+
+ return (idx >= 0 && (uint32_t)idx < _entrycnt) ?
+ &_entries[idx] : NULL;
+}
+
+
+int
+GeneralResult::unpack(const char *buf, const size_t buflen)
+{
+ bool rc = true;
+ const char *ebuf = buf + buflen; // Ref to first after buffer
+ const char *p = buf; // current position in buffer
+
+ if (_entries != NULL)
+ FreeEntries();
+
+ AllocEntries(buflen);
+
+ for (uint32_t i = 0; rc && i < _entrycnt; i++) {
+ const ResConfigEntry *entry = _resClass->GetEntry(i);
+
+ switch (entry->_type) {
+
+ case RES_INT: {
+
+ if (p + sizeof(_entries[i]._intval) <= ebuf) {
+
+ memcpy(&_entries[i]._intval, p, sizeof(_entries[i]._intval));
+ _entries[i]._type = RES_INT;
+ p += sizeof(_entries[i]._intval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(..._intval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_SHORT: {
+
+ uint16_t shortval;
+ if (p + sizeof(shortval) <= ebuf) {
+
+ memcpy(&shortval, p, sizeof(shortval));
+ _entries[i]._intval = (uint32_t)shortval;
+ _entries[i]._type = RES_INT; // type promotion
+ p += sizeof(shortval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(shortval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_BYTE: {
+
+ uint8_t byteval;
+ if (p + sizeof(byteval) <= ebuf) {
+
+ memcpy(&byteval, p, sizeof(byteval));
+ _entries[i]._intval = (uint32_t)byteval;
+ _entries[i]._type = RES_INT; // type promotion
+ p += sizeof(byteval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(byteval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_FLOAT: {
+
+ float floatval;
+ if (p + sizeof(floatval) <= ebuf) {
+
+ memcpy(&floatval, p, sizeof(floatval));
+ _entries[i]._doubleval = (double)floatval;
+ _entries[i]._type = RES_DOUBLE; // type promotion
+ p += sizeof(floatval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(floatval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_DOUBLE: {
+
+ if (p + sizeof(_entries[i]._doubleval) <= ebuf) {
+
+ memcpy(&_entries[i]._doubleval, p, sizeof(_entries[i]._doubleval));
+ _entries[i]._type = RES_DOUBLE;
+ p += sizeof(_entries[i]._doubleval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(..._doubleval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_INT64: {
+
+ if (p + sizeof(_entries[i]._int64val) <= ebuf) {
+
+ memcpy(&_entries[i]._int64val, p, sizeof(_entries[i]._int64val));
+ _entries[i]._type = RES_INT64;
+ p += sizeof(_entries[i]._int64val);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(..._int64val) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_STRING: {
+
+ uint16_t slen;
+ if (p + sizeof(slen) <= ebuf) {
+
+ memcpy(&slen, p, sizeof(slen));
+ p += sizeof(slen);
+
+ if (p + slen <= ebuf) {
+
+ _entries[i]._stringval = _buf + (p - buf);
+ memcpy(_entries[i]._stringval, p, slen);
+ _entries[i]._stringval[slen] = '\0';
+ _entries[i]._stringlen = slen;
+ _entries[i]._type = RES_STRING;
+ p += slen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + slen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(slen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_DATA: {
+
+ uint16_t dlen;
+ if (p + sizeof(dlen) <= ebuf) {
+
+ memcpy(&dlen, p, sizeof(dlen));
+ p += sizeof(dlen);
+
+ if (p + dlen <= ebuf) {
+
+ _entries[i]._dataval = _buf + (p - buf);
+ memcpy(_entries[i]._dataval, p, dlen);
+ _entries[i]._dataval[dlen] = '\0'; // just in case.
+ _entries[i]._datalen = dlen;
+ _entries[i]._type = RES_DATA;
+ p += dlen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + dlen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(dlen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING: {
+
+ uint32_t lslen;
+ bool compressed;
+ if (p + sizeof(lslen) <= ebuf) {
+
+ memcpy(&lslen, p, sizeof(lslen));
+ p += sizeof(lslen);
+
+ compressed = ((lslen & 0x80000000) != 0);
+ lslen &= 0x7fffffff;
+
+ if (p + lslen <= ebuf) {
+
+ if (compressed) { // COMPRESSED
+ uint32_t realLen = 0;
+ if (lslen >= sizeof(realLen))
+ memcpy(&realLen, p, sizeof(realLen));
+ else
+ LOG(warning, "Cannot uncompress docsum field %s; docsum field meta-data incomplete",
+ entry->_bindname.c_str());
+ if (realLen > 0) {
+ _entries[i]._stringval = new char[realLen + 1];
+ }
+ if (_entries[i]._stringval != NULL) {
+ uLongf rlen = realLen;
+ if ((uncompress((Bytef *)_entries[i]._stringval, &rlen,
+ (const Bytef *)(p + sizeof(realLen)),
+ lslen - sizeof(realLen)) == Z_OK) &&
+ rlen == realLen) {
+ assert(rlen == realLen);
+
+ // COMPRESSED LONG STRING FIELD OK
+ _entries[i]._stringval[realLen] = '\0';
+ _entries[i]._stringlen = realLen;
+
+ } else {
+ LOG(warning, "Cannot uncompress docsum field %s; decompression error",
+ entry->_bindname.c_str());
+ delete [] _entries[i]._stringval;
+ _entries[i]._stringval = NULL;
+ }
+ }
+ // insert empty field if decompress failed
+ if (_entries[i]._stringval == NULL) {
+ _entries[i]._stringval = _buf + (p - buf);
+ _entries[i]._stringval[0] = '\0';
+ _entries[i]._stringlen = 0;
+ }
+
+ } else { // UNCOMPRESSED
+
+ _entries[i]._stringval = _buf + (p - buf);
+ memcpy(_entries[i]._stringval, p, lslen);
+ _entries[i]._stringval[lslen] = '\0';
+ _entries[i]._stringlen = lslen;
+
+ }
+ _entries[i]._type = RES_STRING; // type normalization
+ p += lslen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + lslen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(lslen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_LONG_DATA: {
+
+ uint32_t ldlen;
+ bool compressed;
+ if (p + sizeof(ldlen) <= ebuf) {
+
+ memcpy(&ldlen, p, sizeof(ldlen));
+ p += sizeof(ldlen);
+
+ compressed = ((ldlen & 0x80000000) != 0);
+ ldlen &= 0x7fffffff;
+
+ if (p + ldlen <= ebuf) {
+
+ if (compressed) { // COMPRESSED
+ uint32_t realLen = 0;
+ if (ldlen >= sizeof(realLen))
+ memcpy(&realLen, p, sizeof(realLen));
+ else
+ LOG(warning, "Cannot uncompress docsum field %s; docsum field meta-data incomplete",
+ entry->_bindname.c_str());
+ if (realLen > 0) {
+ _entries[i]._dataval = new char [realLen + 1];
+ }
+ if (_entries[i]._dataval != NULL) {
+ uLongf rlen = realLen;
+ if ((uncompress((Bytef *)_entries[i]._dataval, &rlen,
+ (const Bytef *)(p + sizeof(realLen)),
+ ldlen - sizeof(realLen)) == Z_OK) &&
+ rlen == realLen) {
+ assert(rlen == realLen);
+
+ // COMPRESSED LONG DATA FIELD OK
+ _entries[i]._dataval[realLen] = '\0';
+ _entries[i]._datalen = realLen;
+
+ } else {
+ LOG(warning, "Cannot uncompress docsum field %s; decompression error",
+ entry->_bindname.c_str());
+ delete [] _entries[i]._dataval;
+ _entries[i]._dataval = NULL;
+ }
+ }
+
+ // insert empty field if decompress failed
+ if (_entries[i]._dataval == NULL) {
+ _entries[i]._dataval = _buf + (p - buf);
+ _entries[i]._dataval[0] = '\0';
+ _entries[i]._datalen = 0;
+ }
+
+ } else { // UNCOMPRESSED
+
+ _entries[i]._dataval = _buf + (p - buf);
+ memcpy(_entries[i]._dataval, p, ldlen);
+ _entries[i]._dataval[ldlen] = '\0'; // just in case
+ _entries[i]._datalen = ldlen;
+
+ }
+ _entries[i]._type = RES_DATA; // type normalization
+ p += ldlen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + ldlen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(ldlen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ default:
+ LOG(warning, "GeneralResult::unpack: no such type:%d", entry->_type);
+ LOG(error, "Incorrect type in document summary, couldn't unpack");
+ rc = false;
+ break;
+ } // END -- switch (entry->_type) {
+ } // END -- for (uint32_t i = 0; rc && i < _entrycnt; i++) {
+
+ if (rc && p != ebuf) {
+ LOG(debug, "GeneralResult::unpack: p:%p != ebuf:%p", p, ebuf);
+ LOG(error, "Document summary too long, couldn't unpack.");
+ rc = false;
+ }
+
+ if (rc)
+ return 0; // SUCCESS
+
+ // clean up on failure
+ FreeEntries();
+ _entrycnt = 0;
+ _entries = NULL;
+ _buf = NULL;
+ _bufEnd = NULL;
+
+ return -1; // FAIL
+}
+
+
+bool
+GeneralResult::_inplace_unpack(const char *buf, const size_t buflen)
+{
+ bool rc = true;
+ const char *ebuf = buf + buflen; // Ref to first after buffer
+ const char *p = buf; // current position in buffer
+
+ if (_entries != NULL)
+ FreeEntries();
+
+ AllocEntries(buflen, true);
+
+ for (uint32_t i = 0; rc && i < _entrycnt; i++) {
+ const ResConfigEntry *entry = _resClass->GetEntry(i);
+
+ switch (entry->_type) {
+
+ case RES_INT: {
+
+ if (p + sizeof(_entries[i]._intval) <= ebuf) {
+
+ memcpy(&_entries[i]._intval, p, sizeof(_entries[i]._intval));
+ _entries[i]._type = RES_INT;
+ p += sizeof(_entries[i]._intval);
+
+ } else {
+
+ LOG(debug,
+ "GeneralResult::_inplace_unpack: p + sizeof(..._intval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_SHORT: {
+
+ uint16_t shortval;
+ if (p + sizeof(shortval) <= ebuf) {
+
+ memcpy(&shortval, p, sizeof(shortval));
+ _entries[i]._intval = (uint32_t)shortval;
+ _entries[i]._type = RES_INT; // type promotion
+ p += sizeof(shortval);
+
+ } else {
+
+ LOG(debug,
+ "GeneralResult::_inplace_unpack: p + sizeof(shortval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_BYTE: {
+
+ uint8_t byteval;
+ if (p + sizeof(byteval) <= ebuf) {
+
+ memcpy(&byteval, p, sizeof(byteval));
+ _entries[i]._intval = (uint32_t)byteval;
+ _entries[i]._type = RES_INT; // type promotion
+ p += sizeof(byteval);
+
+ } else {
+
+ LOG(debug,
+ "GeneralResult::_inplace_unpack: p + sizeof(byteval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_FLOAT: {
+
+ float floatval;
+ if (p + sizeof(floatval) <= ebuf) {
+
+ memcpy(&floatval, p, sizeof(floatval));
+ _entries[i]._doubleval = (double)floatval;
+ _entries[i]._type = RES_DOUBLE; // type promotion
+ p += sizeof(floatval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(floatval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_DOUBLE: {
+
+ if (p + sizeof(_entries[i]._doubleval) <= ebuf) {
+
+ memcpy(&_entries[i]._doubleval, p, sizeof(_entries[i]._doubleval));
+ _entries[i]._type = RES_DOUBLE;
+ p += sizeof(_entries[i]._doubleval);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(..._doubleval) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_INT64: {
+
+ if (p + sizeof(_entries[i]._int64val) <= ebuf) {
+
+ memcpy(&_entries[i]._int64val, p, sizeof(_entries[i]._int64val));
+ _entries[i]._type = RES_INT64;
+ p += sizeof(_entries[i]._int64val);
+
+ } else {
+
+ LOG(debug, "GeneralResult::unpack: p + sizeof(..._int64val) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_STRING: {
+
+ uint16_t slen;
+ if (p + sizeof(slen) <= ebuf) {
+
+ memcpy(&slen, p, sizeof(slen));
+ p += sizeof(slen);
+
+ if (p + slen <= ebuf) {
+
+ _entries[i]._stringval = const_cast<char *>(p);
+ _entries[i]._stringlen = slen;
+ _entries[i]._type = RES_STRING;
+ p += slen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + slen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + sizeof(slen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_DATA: {
+
+ uint16_t dlen;
+ if (p + sizeof(dlen) <= ebuf) {
+
+ memcpy(&dlen, p, sizeof(dlen));
+ p += sizeof(dlen);
+
+ if (p + dlen <= ebuf) {
+
+ _entries[i]._dataval = const_cast<char *>(p);
+ _entries[i]._datalen = dlen;
+ _entries[i]._type = RES_DATA;
+ p += dlen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + dlen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + sizeof(dlen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_XMLSTRING:
+ case RES_JSONSTRING:
+ case RES_FEATUREDATA:
+ case RES_LONG_STRING: {
+
+ uint32_t flen;
+ uint32_t lslen;
+ if (p + sizeof(flen) <= ebuf) {
+
+ memcpy(&flen, p, sizeof(flen));
+ p += sizeof(flen);
+
+ lslen = flen & 0x7fffffff;
+
+ if (p + lslen <= ebuf) {
+
+ _entries[i]._stringval = const_cast<char *>(p);
+ _entries[i]._stringlen = flen; // with compression flag
+ _entries[i]._type = RES_STRING; // type normalization
+ p += lslen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + lslen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + sizeof(lslen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ case RES_LONG_DATA: {
+
+ uint32_t flen;
+ uint32_t ldlen;
+ if (p + sizeof(flen) <= ebuf) {
+
+ memcpy(&flen, p, sizeof(flen));
+ p += sizeof(flen);
+
+ ldlen = flen & 0x7fffffff;
+
+ if (p + ldlen <= ebuf) {
+
+ _entries[i]._dataval = const_cast<char *>(p);
+ _entries[i]._datalen = flen; // with compression flag
+ _entries[i]._type = RES_DATA; // type normalization
+ p += ldlen;
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + ldlen > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+
+ } else {
+
+ LOG(debug, "GeneralResult::_inplace_unpack: p + sizeof(ldlen) > ebuf");
+ LOG(error, "Document summary too short, couldn't unpack");
+ rc = false;
+ }
+ break;
+ }
+
+ default:
+ LOG(warning,
+ "GeneralResult::_inplace_unpack: no such type:%d",
+ entry->_type);
+ LOG(error, "Incorrect type in document summary, couldn't unpack");
+ rc = false;
+ break;
+ } // END -- switch (entry->_type) {
+ } // END -- for (uint32_t i = 0; rc && i < _entrycnt; i++) {
+
+ if (rc && p != ebuf) {
+ LOG(debug, "GeneralResult::_inplace_unpack: p:%p != ebuf:%p", p, ebuf);
+ LOG(error, "Document summary too long, couldn't unpack.");
+ rc = false;
+ }
+
+ if (rc)
+ return true; // SUCCESS
+
+ // clean up on failure
+ FreeEntries();
+ _entrycnt = 0;
+ _entries = NULL;
+ _buf = NULL;
+ _bufEnd = NULL;
+
+ return false; // FAIL
+}
+
+}
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.h b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.h
new file mode 100644
index 00000000000..e882a5a9ed8
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.h
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/resultclass.h>
+#include <vespa/searchsummary/docsummary/docsumstorevalue.h>
+
+namespace search {
+namespace docsummary {
+
+class urlresult
+{
+protected:
+ uint32_t _partition;
+ uint32_t _docid;
+ HitRank _metric;
+
+public:
+ urlresult(uint32_t partition, uint32_t docid, HitRank metric);
+ virtual ~urlresult();
+
+ virtual bool IsGeneral() const { return false; }
+ uint32_t GetPartition() const { return _partition; }
+ uint32_t GetDocID() const { return _docid; }
+ HitRank GetMetric() const { return _metric; }
+ virtual int unpack(const char *buf, const size_t buflen) = 0;
+};
+
+
+class badurlresult : public urlresult
+{
+public:
+ badurlresult();
+ badurlresult(uint32_t partition, uint32_t docid, HitRank metric);
+ virtual ~badurlresult();
+
+ virtual int unpack(const char *buf, const size_t buflen);
+};
+
+
+class GeneralResult : public urlresult
+{
+private:
+ GeneralResult(const GeneralResult &);
+ GeneralResult& operator=(const GeneralResult &);
+
+ const ResultClass *_resClass;
+ uint32_t _entrycnt;
+ ResEntry *_entries;
+ char *_buf; // allocated in same chunk as _entries
+ char *_bufEnd; // first byte after _buf
+
+ bool InBuf(void *pt)
+ {
+ return ((char *)pt >= _buf &&
+ (char *)pt < _bufEnd);
+ }
+
+ void AllocEntries(uint32_t buflen, bool inplace = false);
+ void FreeEntries();
+
+ bool _inplace_unpack(const char *buf, const size_t buflen);
+
+public:
+ GeneralResult(const ResultClass *resClass, uint32_t partition,
+ uint32_t docid, HitRank metric);
+ ~GeneralResult();
+
+ const ResultClass *GetClass() const { return _resClass; }
+ ResEntry *GetEntry(uint32_t idx);
+ ResEntry *GetEntry(const char *name);
+ ResEntry *GetEntryFromEnumValue(uint32_t val);
+ virtual bool IsGeneral() const { return true; }
+ virtual int unpack(const char *buf, const size_t buflen);
+
+ bool inplaceUnpack(const DocsumStoreValue &value) {
+ if (value.valid()) {
+ return _inplace_unpack(value.fieldsPt(), value.fieldsSz());
+ } else {
+ return false;
+ }
+ }
+};
+
+}
+}
+
+
diff --git a/searchsummary/testrun/.gitignore b/searchsummary/testrun/.gitignore
new file mode 100644
index 00000000000..559f57dccbe
--- /dev/null
+++ b/searchsummary/testrun/.gitignore
@@ -0,0 +1,9 @@
+test-report.html
+test-report.html.*
+test.*.*.desc
+test.*.*.file.*
+test.*.*.files.html
+test.*.*.log
+tmp.*
+/test.*.*.result
+Makefile