summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /streamingvisitors
Publish
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/.gitignore3
-rw-r--r--streamingvisitors/CMakeLists.txt25
-rw-r--r--streamingvisitors/OWNERS2
-rw-r--r--streamingvisitors/doc/SearchVisitorProtocol.html93
-rw-r--r--streamingvisitors/src/.gitignore6
-rw-r--r--streamingvisitors/src/testlist.txt3
-rw-r--r--streamingvisitors/src/tests/hitcollector/.gitignore4
-rw-r--r--streamingvisitors/src/tests/hitcollector/CMakeLists.txt8
-rw-r--r--streamingvisitors/src/tests/hitcollector/DESC1
-rw-r--r--streamingvisitors/src/tests/hitcollector/FILES1
-rw-r--r--streamingvisitors/src/tests/hitcollector/hitcollector.cpp314
-rw-r--r--streamingvisitors/src/tests/querywrapper/.gitignore4
-rw-r--r--streamingvisitors/src/tests/querywrapper/CMakeLists.txt8
-rw-r--r--streamingvisitors/src/tests/querywrapper/DESC1
-rw-r--r--streamingvisitors/src/tests/querywrapper/FILES1
-rw-r--r--streamingvisitors/src/tests/querywrapper/querywrapper.cpp108
-rw-r--r--streamingvisitors/src/tests/searchvisitor/.gitignore4
-rw-r--r--streamingvisitors/src/tests/searchvisitor/CMakeLists.txt8
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/documenttypes.cfg317
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.aaa.cfg11
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.simple.cfg0
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.aaa.cfg43
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.simple.cfg0
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/summary.aaa.cfg30
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/summary.simple.cfg0
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/summarymap.aaa.cfg11
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/summarymap.simple.cfg0
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.aaa.cfg98
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.simple.cfg0
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.aaa.cfg26
-rw-r--r--streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.simple.cfg0
-rw-r--r--streamingvisitors/src/tests/searchvisitor/searchvisitor.cpp123
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/.gitignore9
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt15
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp149
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.h145
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp37
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.h95
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp61
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/queryenvironment.h63
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querytermdata.cpp15
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querytermdata.h30
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp51
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querywrapper.h65
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp200
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankmanager.h92
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp304
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.h75
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp90
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchenvironment.h53
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp1166
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h464
-rw-r--r--streamingvisitors/src/vespa/snippetvisitor/.gitignore0
53 files changed, 4432 insertions, 0 deletions
diff --git a/streamingvisitors/.gitignore b/streamingvisitors/.gitignore
new file mode 100644
index 00000000000..31d449f16ee
--- /dev/null
+++ b/streamingvisitors/.gitignore
@@ -0,0 +1,3 @@
+testrun
+Makefile
+Testing
diff --git a/streamingvisitors/CMakeLists.txt b/streamingvisitors/CMakeLists.txt
new file mode 100644
index 00000000000..6623f743d3b
--- /dev/null
+++ b/streamingvisitors/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_define_module(
+ DEPENDS
+ fastos
+ fastlib_fast
+ vespalog
+ storage
+ storageapi
+ config_cloudconfig
+ document
+ vespalib
+ vdslib
+ vsm
+
+ EXTERNAL_DEPENDS
+ cppunit
+
+ LIBS
+ src/vespa/searchvisitor
+
+ TESTS
+ src/tests/hitcollector
+ src/tests/querywrapper
+ src/tests/searchvisitor
+)
diff --git a/streamingvisitors/OWNERS b/streamingvisitors/OWNERS
new file mode 100644
index 00000000000..f62763ab1da
--- /dev/null
+++ b/streamingvisitors/OWNERS
@@ -0,0 +1,2 @@
+balder
+geirst
diff --git a/streamingvisitors/doc/SearchVisitorProtocol.html b/streamingvisitors/doc/SearchVisitorProtocol.html
new file mode 100644
index 00000000000..55100929ac9
--- /dev/null
+++ b/streamingvisitors/doc/SearchVisitorProtocol.html
@@ -0,0 +1,93 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<HTML>
+<HEAD>
+ <META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
+ <TITLE></TITLE>
+ <META NAME="GENERATOR" CONTENT="OpenOffice.org 2.3 (Unix)">
+ <META NAME="AUTHOR" CONTENT="Henning Baldersheim">
+ <META NAME="CREATED" CONTENT="20080312;9103800">
+ <META NAME="CHANGEDBY" CONTENT="Henning Baldersheim">
+ <META NAME="CHANGED" CONTENT="20080314;8394700">
+ <META NAME="CHANGEDBY" CONTENT="Henning Baldersheim">
+ <META NAME="CHANGEDBY" CONTENT="Henning Baldersheim">
+ <STYLE TYPE="text/css">
+ <!--
+ @page { size: 8.5in 11in; margin: 0.79in }
+ P { margin-bottom: 0.08in }
+ H1 { margin-bottom: 0.08in }
+ H1.western { font-family: "Helvetica"; font-size: 16pt }
+ H1.cjk { font-family: "AR PL ShanHeiSun Uni"; font-size: 16pt }
+ H1.ctl { font-family: "Tahoma"; font-size: 16pt }
+ H2 { margin-bottom: 0.08in }
+ H2.western { font-family: "Helvetica"; font-size: 14pt; font-style: italic }
+ H2.cjk { font-family: "AR PL ShanHeiSun Uni"; font-size: 14pt; font-style: italic }
+ H2.ctl { font-family: "Arial Unicode MS"; font-size: 14pt; font-style: italic }
+ -->
+ </STYLE>
+</HEAD>
+<BODY LANG="en-US" DIR="LTR">
+<H1 CLASS="western">SearchVisitor design</H1>
+<P>The SearchVisitor is a visitor plugin running in the storaged
+binary. It processes queries and docsum requests and returns
+SearchResult and DocumentSummary objects to the client. It uses Vespa
+Streaming Matcher (VSM) to generate the search results and document
+summaries.</P>
+<P>Since the distributors in VDS do not have fdispatch capability,
+that is implemented in the QRS(client). It must collect all messages
+received, merge them and present them to its liking.</P>
+<H2 CLASS="western">Initiation</H2>
+<P>The client sends down a createVisitor command with the following
+parameters set:</P>
+<P>Timeout : This is the query timeout.</P>
+<P>VisitorLibrary: &quot;SearchVisitor&quot;. Tells the framework to
+use the SearchVisitor visitor plugin, rather than the default
+DumpVisitor.</P>
+<P>VisitorParameters: Containing the following arguments for the
+SearchVisitor:</P>
+<UL>
+ <LI><P>&quot;query&quot;: The raw encoded query stack from QRS. It
+ has the same format as the query parameter in the QueryPacket sent
+ to indexed search.</P>
+ <LI><P>&quot;searchcluster&quot;: This identifies which
+ searchcluster is queried. The visitor uses this to choose the
+ correct config.</P>
+ <LI><P>&quot;summaryclass&quot;: Which summaryclass is wanted.</P>
+ <LI><P>&quot;summarycount&quot;: The number of summaries wanted.</P>
+ <LI><P>&quot;aggregation&quot;: The aggregation specification as specified
+ by the fs4 protocol. The aggregation options are the same as for
+ indexed search.
+ </P>
+ <LI><P>&quot;sort&quot;: The sort specification as specified by the fs4
+ protocol.</P>
+ <LI><P>&quot;unique&quot;: The field to do duplicate removal on.
+ </P>
+ <LI><P>&quot;rankprofile&quot;: Which rank profile to use. The default is 0.</P>
+ <LI><P>&quot;rankproperties&quot;: A set of properties to use in ranking for the backend.</P>
+</UL>
+<P>The backend will return all hits, but only the requested number of
+summaries. It is the client's responsibility to handle &quot;hits&quot;
+and &quot;offset&quot; query parameters.</P>
+<P>Only singlephase has been implemented as all the data are
+available anyway.</P>
+<P>&quot;aggregate&quot;, &quot;sort&quot; and &quot;unique&quot; are not limited to
+attributes as they are in indexed search. Every field has attribute
+semantics in streamed search.</P>
+<H2 CLASS="western">SearchResult</H2>
+<P>This is the message returned to the client after the search has
+been conducted.</P>
+<P>It contains a list of Hits each containing the documentid as known
+by VDS, and a rank identifying the relevance of the document with
+respect to the query. The list is sorted on descending rank.</P>
+<H2 CLASS="western">DocumentSummary</H2>
+<P>This is the message returned to the client after all the document
+summaries have been generated.</P>
+<P>It contains a list of Summary objects, each containing the
+documentId as known by VDS and the summary blob. The list is sorted
+on ascending docid. The summary blob is encoded as specified by the
+fastserver4 protocol.</P>
+<H2 CLASS="western">AggregationResult</H2>
+<P>This is the message returned containing the aggregation data. They
+follow the format of the aggregation packet used in the fs4 protocol.</P>
+</BODY>
+</HTML>
diff --git a/streamingvisitors/src/.gitignore b/streamingvisitors/src/.gitignore
new file mode 100644
index 00000000000..f7611c3f5a8
--- /dev/null
+++ b/streamingvisitors/src/.gitignore
@@ -0,0 +1,6 @@
+.cvsignore
+Makefile.ini
+config_command.sh
+doc
+project.dsw
+/streamingvisitors.mak
diff --git a/streamingvisitors/src/testlist.txt b/streamingvisitors/src/testlist.txt
new file mode 100644
index 00000000000..c4ff46e690c
--- /dev/null
+++ b/streamingvisitors/src/testlist.txt
@@ -0,0 +1,3 @@
+tests/hitcollector
+tests/querywrapper
+tests/searchvisitor
diff --git a/streamingvisitors/src/tests/hitcollector/.gitignore b/streamingvisitors/src/tests/hitcollector/.gitignore
new file mode 100644
index 00000000000..2ffc5acb4c1
--- /dev/null
+++ b/streamingvisitors/src/tests/hitcollector/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+hitcollector_test
+streamingvisitors_hitcollector_test_app
diff --git a/streamingvisitors/src/tests/hitcollector/CMakeLists.txt b/streamingvisitors/src/tests/hitcollector/CMakeLists.txt
new file mode 100644
index 00000000000..62c481e13a7
--- /dev/null
+++ b/streamingvisitors/src/tests/hitcollector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(streamingvisitors_hitcollector_test_app
+ SOURCES
+ hitcollector.cpp
+ DEPENDS
+ streamingvisitors_searchvisitor
+)
+vespa_add_test(NAME streamingvisitors_hitcollector_test_app COMMAND streamingvisitors_hitcollector_test_app)
diff --git a/streamingvisitors/src/tests/hitcollector/DESC b/streamingvisitors/src/tests/hitcollector/DESC
new file mode 100644
index 00000000000..4933144da80
--- /dev/null
+++ b/streamingvisitors/src/tests/hitcollector/DESC
@@ -0,0 +1 @@
+Test of the hit collector used by the streaming searcher.
diff --git a/streamingvisitors/src/tests/hitcollector/FILES b/streamingvisitors/src/tests/hitcollector/FILES
new file mode 100644
index 00000000000..88a0d4ba4b3
--- /dev/null
+++ b/streamingvisitors/src/tests/hitcollector/FILES
@@ -0,0 +1 @@
+hitcollector.cpp
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp
new file mode 100644
index 00000000000..4e008211223
--- /dev/null
+++ b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp
@@ -0,0 +1,314 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchvisitor/hitcollector.h>
+#include <vespa/vdslib/container/searchresult.h>
+#include <vespa/vsm/common/storagedocument.h>
+
+LOG_SETUP("hitcollector_test");
+
+using namespace document;
+using namespace search::fef;
+using namespace vespalib;
+using namespace vdslib;
+using namespace vsm;
+
+namespace storage {
+
+class HitCollectorTest : public vespalib::TestApp
+{
+private:
+ void assertHit(SearchResult::RankType expRank, uint32_t hitNo, SearchResult & rs);
+ void assertHit(SearchResult::RankType expRank, uint32_t expDocId, uint32_t hitNo, SearchResult & rs);
+ void addHit(HitCollector &hc, uint32_t docId, double score,
+ const char *sortData = nullptr, size_t sortDataSize = 0);
+ void testSimple();
+ void testGapsInDocId();
+ void testHeapProperty();
+ void testHeapPropertyWithSorting();
+ void testEmpty();
+ void testFeatureSet();
+
+ DocumentType _docType;
+
+public:
+ HitCollectorTest();
+ int Main();
+};
+
+HitCollectorTest::HitCollectorTest()
+ : _docType("testdoc", 0)
+{
+}
+
+void
+HitCollectorTest::assertHit(SearchResult::RankType expRank, uint32_t hitNo, SearchResult & rs)
+{
+ assertHit(expRank, hitNo, hitNo, rs);
+}
+
+void
+HitCollectorTest::assertHit(SearchResult::RankType expRank, uint32_t expDocId, uint32_t hitNo, SearchResult & rs)
+{
+ //std::cout << "assertHit(" << expRank << ", " << expDocId << ")" << std::endl;
+ uint32_t lDocId;
+ const char * gDocId;
+ SearchResult::RankType rank;
+ lDocId = rs.getHit(hitNo, gDocId, rank);
+ EXPECT_EQUAL(rank, expRank);
+ EXPECT_EQUAL(lDocId, expDocId);
+}
+
+void
+HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const char *sortData, size_t sortDataSize)
+{
+ document::Document::UP doc(new document::Document(_docType, DocumentId("doc::")));
+ StorageDocument::SP sdoc(new StorageDocument(std::move(doc)));
+ ASSERT_TRUE(sdoc->valid());
+ MatchData md(MatchData::params());
+ md.setDocId(docId);
+ hc.addHit(sdoc, md, score, sortData, sortDataSize);
+}
+
+void
+HitCollectorTest::testSimple()
+{
+ HitCollector hc(5);
+
+ // add hits to hit collector
+ for (uint32_t i = 0; i < 5; ++i) {
+ addHit(hc, i, 10 + i);
+ }
+ // merge from match data heap and fill search result
+ for (size_t i = 0; i < 2; ++i) { // try it twice
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 5);
+ assertHit(10, 0, sr);
+ assertHit(11, 1, sr);
+ assertHit(12, 2, sr);
+ assertHit(13, 3, sr);
+ assertHit(14, 4, sr);
+ }
+}
+
+void
+HitCollectorTest::testGapsInDocId()
+{
+ HitCollector hc(5);
+
+ // add hits to hit collector
+ for (uint32_t i = 0; i < 5; ++i) {
+ addHit(hc, i * 2, i * 2 + 10);
+ }
+
+ // merge from heap into search result
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+
+ ASSERT_TRUE(sr.getHitCount() == 5);
+ assertHit(10, 0, 0, sr);
+ assertHit(12, 2, 1, sr);
+ assertHit(14, 4, 2, sr);
+ assertHit(16, 6, 3, sr);
+ assertHit(18, 8, 4, sr);
+}
+
+void
+HitCollectorTest::testHeapProperty()
+{
+ {
+ HitCollector hc(3);
+ // add hits (low to high)
+ for (uint32_t i = 0; i < 6; ++i) {
+ addHit(hc, i, i + 10);
+ }
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(13, 3, 0, sr);
+ assertHit(14, 4, 1, sr);
+ assertHit(15, 5, 2, sr);
+ }
+ {
+ HitCollector hc(3);
+ // add hits (high to low)
+ for (uint32_t i = 0; i < 6; ++i) {
+ addHit(hc, i, 10 - i);
+ }
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(10, 0, 0, sr);
+ assertHit(9, 1, 1, sr);
+ assertHit(8, 2, 2, sr);
+ }
+ {
+ HitCollector hc(3);
+ // add hits (same rank score)
+ for (uint32_t i = 0; i < 6; ++i) {
+ addHit(hc, i, 10);
+ }
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(10, 0, 0, sr);
+ assertHit(10, 1, 1, sr);
+ assertHit(10, 2, 2, sr);
+ }
+}
+
+void
+HitCollectorTest::testHeapPropertyWithSorting()
+{
+ std::vector<char> sortData;
+ sortData.push_back('a');
+ sortData.push_back('b');
+ sortData.push_back('c');
+ sortData.push_back('d');
+ sortData.push_back('e');
+ sortData.push_back('f');
+ {
+ HitCollector hc(3);
+ // add hits ('a' is sorted/ranked better than 'b')
+ for (uint32_t i = 0; i < 6; ++i) {
+ addHit(hc, i, i + 10, &sortData[i], 1);
+ }
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(10, 0, 0, sr);
+ assertHit(11, 1, 1, sr);
+ assertHit(12, 2, 2, sr);
+ }
+ {
+ HitCollector hc(3);
+ // add hits ('a' is sorted/ranked better than 'b')
+ for (uint32_t i = 0; i < 6; ++i) {
+ addHit(hc, i, i + 10, &sortData[5 - i], 1);
+ }
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(13, 3, 0, sr);
+ assertHit(14, 4, 1, sr);
+ assertHit(15, 5, 2, sr);
+ }
+ {
+ HitCollector hc(3);
+ // add hits (same sort blob)
+ for (uint32_t i = 0; i < 6; ++i) {
+ addHit(hc, i, 10, &sortData[0], 1);
+ }
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(10, 0, 0, sr);
+ assertHit(10, 1, 1, sr);
+ assertHit(10, 2, 2, sr);
+ }
+}
+
+void
+HitCollectorTest::testEmpty()
+{
+ HitCollector hc(0);
+ addHit(hc, 0, 0);
+ SearchResult rs;
+ hc.fillSearchResult(rs);
+ ASSERT_TRUE(rs.getHitCount() == 0);
+}
+
+class MyRankProgram : public HitCollector::IRankProgram
+{
+private:
+ MatchData _matchData;
+
+public:
+ MyRankProgram() : _matchData(MatchData::params().numFeatures(3)) {}
+ virtual const search::fef::MatchData &run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override {
+ _matchData.setDocId(docid);
+ *_matchData.resolveFeature(0) = docid + 10;
+ *_matchData.resolveFeature(1) = docid + 20;
+ *_matchData.resolveFeature(2) = docid + 30;
+ return _matchData;
+ }
+};
+
+void
+HitCollectorTest::testFeatureSet()
+{
+ HitCollector hc(3);
+
+ addHit(hc, 0, 10);
+ addHit(hc, 1, 50); // on heap
+ addHit(hc, 2, 20);
+ addHit(hc, 3, 40); // on heap
+ addHit(hc, 4, 30); // on heap
+
+ std::vector<vespalib::string> names;
+ std::vector<FeatureHandle> handles;
+ names.push_back("foo");
+ names.push_back("bar");
+ handles.push_back(0);
+ handles.push_back(2);
+
+ MyRankProgram rankProgram;
+ search::FeatureSet::SP sf = hc.getFeatureSet(rankProgram, names, handles);
+
+ EXPECT_EQUAL(sf->getNames().size(), 2u);
+ EXPECT_EQUAL(sf->getNames()[0], "foo");
+ EXPECT_EQUAL(sf->getNames()[1], "bar");
+ EXPECT_EQUAL(sf->numFeatures(), 2u);
+ EXPECT_EQUAL(sf->numDocs(), 3u);
+ {
+ const search::feature_t * f = sf->getFeaturesByDocId(1);
+ ASSERT_TRUE(f != NULL);
+ EXPECT_EQUAL(f[0], 11); // 10 + docId
+ EXPECT_EQUAL(f[1], 31); // 30 + docId
+ }
+ {
+ const search::feature_t * f = sf->getFeaturesByDocId(3);
+ ASSERT_TRUE(f != NULL);
+ EXPECT_EQUAL(f[0], 13);
+ EXPECT_EQUAL(f[1], 33);
+ }
+ {
+ const search::feature_t * f = sf->getFeaturesByDocId(4);
+ ASSERT_TRUE(f != NULL);
+ EXPECT_EQUAL(f[0], 14);
+ EXPECT_EQUAL(f[1], 34);
+ }
+ ASSERT_TRUE(sf->getFeaturesByDocId(0) == NULL);
+ ASSERT_TRUE(sf->getFeaturesByDocId(2) == NULL);
+
+ SearchResult sr;
+ hc.fillSearchResult(sr);
+ ASSERT_TRUE(sr.getHitCount() == 3);
+ assertHit(50, 1, 0, sr);
+ assertHit(40, 3, 1, sr);
+ assertHit(30, 4, 2, sr);
+}
+
+int
+HitCollectorTest::Main()
+{
+ TEST_INIT("hitcollector_test");
+
+ testSimple();
+ testGapsInDocId();
+ testHeapProperty();
+ testHeapPropertyWithSorting();
+ testEmpty();
+ testFeatureSet();
+
+ TEST_DONE();
+}
+
+} // namespace storage
+
+TEST_APPHOOK(storage::HitCollectorTest)
diff --git a/streamingvisitors/src/tests/querywrapper/.gitignore b/streamingvisitors/src/tests/querywrapper/.gitignore
new file mode 100644
index 00000000000..3c8a4b1c9f8
--- /dev/null
+++ b/streamingvisitors/src/tests/querywrapper/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+querywrapper_test
+streamingvisitors_querywrapper_test_app
diff --git a/streamingvisitors/src/tests/querywrapper/CMakeLists.txt b/streamingvisitors/src/tests/querywrapper/CMakeLists.txt
new file mode 100644
index 00000000000..501b1eee1a9
--- /dev/null
+++ b/streamingvisitors/src/tests/querywrapper/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(streamingvisitors_querywrapper_test_app
+ SOURCES
+ querywrapper.cpp
+ DEPENDS
+ streamingvisitors_searchvisitor
+)
+vespa_add_test(NAME streamingvisitors_querywrapper_test_app COMMAND streamingvisitors_querywrapper_test_app)
diff --git a/streamingvisitors/src/tests/querywrapper/DESC b/streamingvisitors/src/tests/querywrapper/DESC
new file mode 100644
index 00000000000..dfdd9d55a8e
--- /dev/null
+++ b/streamingvisitors/src/tests/querywrapper/DESC
@@ -0,0 +1 @@
+Test of the query wrapper used by the rank processor.
diff --git a/streamingvisitors/src/tests/querywrapper/FILES b/streamingvisitors/src/tests/querywrapper/FILES
new file mode 100644
index 00000000000..91138d25b2e
--- /dev/null
+++ b/streamingvisitors/src/tests/querywrapper/FILES
@@ -0,0 +1 @@
+querywrapper.cpp
diff --git a/streamingvisitors/src/tests/querywrapper/querywrapper.cpp b/streamingvisitors/src/tests/querywrapper/querywrapper.cpp
new file mode 100644
index 00000000000..b717962d920
--- /dev/null
+++ b/streamingvisitors/src/tests/querywrapper/querywrapper.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <iostream>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+
+#include <vespa/searchvisitor/querywrapper.h>
+
+using namespace search;
+using namespace search::query;
+
+namespace storage {
+
+class QueryWrapperTest : public vespalib::TestApp
+{
+private:
+ void testQueryWrapper();
+
+public:
+ int Main();
+};
+
+void
+QueryWrapperTest::testQueryWrapper()
+{
+ EmptyQueryNodeResult empty;
+ PhraseQueryNode * null = NULL;
+ {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(3);
+ {
+ builder.addStringTerm("a", "", 0, Weight(0));
+ builder.addPhrase(3, "", 0, Weight(0));
+ {
+ builder.addStringTerm("b", "", 0, Weight(0));
+ builder.addStringTerm("c", "", 0, Weight(0));
+ builder.addStringTerm("d", "", 0, Weight(0));
+ }
+ builder.addStringTerm("e", "", 0, Weight(0));
+ }
+ Node::UP node = builder.build();
+ vespalib::string stackDump = StackDumpCreator::create(*node);
+ Query q(empty, stackDump);
+ QueryWrapper wrap(q);
+ QueryWrapper::TermList & tl = wrap.getTermList();
+
+ QueryTermList terms;
+ q.getLeafs(terms);
+ ASSERT_TRUE(tl.size() == 5 && terms.size() == 5);
+ for (size_t i = 0; i < 5; ++i) {
+ EXPECT_EQUAL(tl[i].getTerm(), terms[i]);
+ std::cout << "t[" << i << "]:" << terms[i] << std::endl;
+ }
+
+ QueryNodeRefList phrases;
+ q.getPhrases(phrases);
+ for (size_t i = 0; i < phrases.size(); ++i) {
+ std::cout << "p[" << i << "]:" << phrases[i] << std::endl;
+ }
+ EXPECT_EQUAL(phrases.size(), 1u);
+ ASSERT_TRUE(phrases.size() == 1);
+ EXPECT_EQUAL(tl[0].getParent(), null);
+ EXPECT_EQUAL(tl[1].getParent(), phrases[0]);
+ EXPECT_EQUAL(tl[2].getParent(), phrases[0]);
+ EXPECT_EQUAL(tl[3].getParent(), phrases[0]);
+ EXPECT_EQUAL(tl[4].getParent(), null);
+
+ EXPECT_EQUAL(tl[0].getIndex(), 0u);
+ EXPECT_EQUAL(tl[1].getIndex(), 0u);
+ EXPECT_EQUAL(tl[2].getIndex(), 1u);
+ EXPECT_EQUAL(tl[3].getIndex(), 2u);
+ EXPECT_EQUAL(tl[4].getIndex(), 0u);
+
+ EXPECT_TRUE(!tl[0].isFirstPhraseTerm());
+ EXPECT_TRUE( tl[1].isFirstPhraseTerm());
+ EXPECT_TRUE(!tl[2].isFirstPhraseTerm());
+ EXPECT_TRUE(!tl[3].isFirstPhraseTerm());
+ EXPECT_TRUE(!tl[4].isFirstPhraseTerm());
+
+ EXPECT_TRUE(!tl[0].isPhraseTerm());
+ EXPECT_TRUE( tl[1].isPhraseTerm());
+ EXPECT_TRUE( tl[2].isPhraseTerm());
+ EXPECT_TRUE( tl[3].isPhraseTerm());
+ EXPECT_TRUE(!tl[4].isPhraseTerm());
+
+ EXPECT_EQUAL(tl[0].getPosAdjust(), 0u);
+ EXPECT_EQUAL(tl[1].getPosAdjust(), 2u);
+ EXPECT_EQUAL(tl[2].getPosAdjust(), 2u);
+ EXPECT_EQUAL(tl[3].getPosAdjust(), 2u);
+ EXPECT_EQUAL(tl[4].getPosAdjust(), 0u);
+ }
+}
+
+int
+QueryWrapperTest::Main()
+{
+ TEST_INIT("querywrapper_test");
+
+ testQueryWrapper();
+
+ TEST_DONE();
+}
+
+} // namespace storage
+
+TEST_APPHOOK(storage::QueryWrapperTest)
diff --git a/streamingvisitors/src/tests/searchvisitor/.gitignore b/streamingvisitors/src/tests/searchvisitor/.gitignore
new file mode 100644
index 00000000000..543f6428e3c
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/.gitignore
@@ -0,0 +1,4 @@
+/.depend
+/Makefile
+/searchvisitor_test
+streamingvisitors_searchvisitor_test_app
diff --git a/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt b/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt
new file mode 100644
index 00000000000..83abfde144a
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(streamingvisitors_searchvisitor_test_app
+ SOURCES
+ searchvisitor.cpp
+ DEPENDS
+ streamingvisitors_searchvisitor
+)
+vespa_add_test(NAME streamingvisitors_searchvisitor_test_app COMMAND streamingvisitors_searchvisitor_test_app)
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/documenttypes.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/documenttypes.cfg
new file mode 100644
index 00000000000..7bb863dc8e2
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/documenttypes.cfg
@@ -0,0 +1,317 @@
+enablecompression false
+documenttype[0].id 1843830320
+documenttype[0].name "maptest"
+documenttype[0].version 0
+documenttype[0].headerstruct -91088113
+documenttype[0].bodystruct -1659731740
+documenttype[0].inherits[0].id 8
+documenttype[0].datatype[0].id 3474528
+documenttype[0].datatype[0].type STRUCT
+documenttype[0].datatype[0].array.element.id 0
+documenttype[0].datatype[0].map.key.id 0
+documenttype[0].datatype[0].map.value.id 0
+documenttype[0].datatype[0].wset.key.id 0
+documenttype[0].datatype[0].wset.createifnonexistent false
+documenttype[0].datatype[0].wset.removeifzero false
+documenttype[0].datatype[0].annotationref.annotation.id 0
+documenttype[0].datatype[0].sstruct.name "s1"
+documenttype[0].datatype[0].sstruct.version 0
+documenttype[0].datatype[0].sstruct.compression.type NONE
+documenttype[0].datatype[0].sstruct.compression.level 0
+documenttype[0].datatype[0].sstruct.compression.threshold 95
+documenttype[0].datatype[0].sstruct.compression.minsize 200
+documenttype[0].datatype[0].sstruct.field[0].name "a"
+documenttype[0].datatype[0].sstruct.field[0].id 493339625
+documenttype[0].datatype[0].sstruct.field[0].id_v6 703514648
+documenttype[0].datatype[0].sstruct.field[0].datatype 2
+documenttype[0].datatype[0].sstruct.field[1].name "b"
+documenttype[0].datatype[0].sstruct.field[1].id 441632370
+documenttype[0].datatype[0].sstruct.field[1].id_v6 1420600727
+documenttype[0].datatype[0].sstruct.field[1].datatype 2
+documenttype[0].datatype[1].id 339965458
+documenttype[0].datatype[1].type MAP
+documenttype[0].datatype[1].array.element.id 0
+documenttype[0].datatype[1].map.key.id 2
+documenttype[0].datatype[1].map.value.id 2
+documenttype[0].datatype[1].wset.key.id 0
+documenttype[0].datatype[1].wset.createifnonexistent false
+documenttype[0].datatype[1].wset.removeifzero false
+documenttype[0].datatype[1].annotationref.annotation.id 0
+documenttype[0].datatype[1].sstruct.name ""
+documenttype[0].datatype[1].sstruct.version 0
+documenttype[0].datatype[1].sstruct.compression.type NONE
+documenttype[0].datatype[1].sstruct.compression.level 0
+documenttype[0].datatype[1].sstruct.compression.threshold 95
+documenttype[0].datatype[1].sstruct.compression.minsize 200
+documenttype[0].datatype[2].id 1888564261
+documenttype[0].datatype[2].type MAP
+documenttype[0].datatype[2].array.element.id 0
+documenttype[0].datatype[2].map.key.id 2
+documenttype[0].datatype[2].map.value.id 3474528
+documenttype[0].datatype[2].wset.key.id 0
+documenttype[0].datatype[2].wset.createifnonexistent false
+documenttype[0].datatype[2].wset.removeifzero false
+documenttype[0].datatype[2].annotationref.annotation.id 0
+documenttype[0].datatype[2].sstruct.name ""
+documenttype[0].datatype[2].sstruct.version 0
+documenttype[0].datatype[2].sstruct.compression.type NONE
+documenttype[0].datatype[2].sstruct.compression.level 0
+documenttype[0].datatype[2].sstruct.compression.threshold 95
+documenttype[0].datatype[2].sstruct.compression.minsize 200
+documenttype[0].datatype[3].id -1486737430
+documenttype[0].datatype[3].type ARRAY
+documenttype[0].datatype[3].array.element.id 2
+documenttype[0].datatype[3].map.key.id 0
+documenttype[0].datatype[3].map.value.id 0
+documenttype[0].datatype[3].wset.key.id 0
+documenttype[0].datatype[3].wset.createifnonexistent false
+documenttype[0].datatype[3].wset.removeifzero false
+documenttype[0].datatype[3].annotationref.annotation.id 0
+documenttype[0].datatype[3].sstruct.name ""
+documenttype[0].datatype[3].sstruct.version 0
+documenttype[0].datatype[3].sstruct.compression.type NONE
+documenttype[0].datatype[3].sstruct.compression.level 0
+documenttype[0].datatype[3].sstruct.compression.threshold 95
+documenttype[0].datatype[3].sstruct.compression.minsize 200
+documenttype[0].datatype[4].id -1220861393
+documenttype[0].datatype[4].type MAP
+documenttype[0].datatype[4].array.element.id 0
+documenttype[0].datatype[4].map.key.id 2
+documenttype[0].datatype[4].map.value.id -1486737430
+documenttype[0].datatype[4].wset.key.id 0
+documenttype[0].datatype[4].wset.createifnonexistent false
+documenttype[0].datatype[4].wset.removeifzero false
+documenttype[0].datatype[4].annotationref.annotation.id 0
+documenttype[0].datatype[4].sstruct.name ""
+documenttype[0].datatype[4].sstruct.version 0
+documenttype[0].datatype[4].sstruct.compression.type NONE
+documenttype[0].datatype[4].sstruct.compression.level 0
+documenttype[0].datatype[4].sstruct.compression.threshold 95
+documenttype[0].datatype[4].sstruct.compression.minsize 200
+documenttype[0].datatype[5].id 1070047409
+documenttype[0].datatype[5].type MAP
+documenttype[0].datatype[5].array.element.id 0
+documenttype[0].datatype[5].map.key.id 2
+documenttype[0].datatype[5].map.value.id 339965458
+documenttype[0].datatype[5].wset.key.id 0
+documenttype[0].datatype[5].wset.createifnonexistent false
+documenttype[0].datatype[5].wset.removeifzero false
+documenttype[0].datatype[5].annotationref.annotation.id 0
+documenttype[0].datatype[5].sstruct.name ""
+documenttype[0].datatype[5].sstruct.version 0
+documenttype[0].datatype[5].sstruct.compression.type NONE
+documenttype[0].datatype[5].sstruct.compression.level 0
+documenttype[0].datatype[5].sstruct.compression.threshold 95
+documenttype[0].datatype[5].sstruct.compression.minsize 200
+documenttype[0].datatype[6].id -91088113
+documenttype[0].datatype[6].type STRUCT
+documenttype[0].datatype[6].array.element.id 0
+documenttype[0].datatype[6].map.key.id 0
+documenttype[0].datatype[6].map.value.id 0
+documenttype[0].datatype[6].wset.key.id 0
+documenttype[0].datatype[6].wset.createifnonexistent false
+documenttype[0].datatype[6].wset.removeifzero false
+documenttype[0].datatype[6].annotationref.annotation.id 0
+documenttype[0].datatype[6].sstruct.name "maptest.header"
+documenttype[0].datatype[6].sstruct.version 0
+documenttype[0].datatype[6].sstruct.compression.type NONE
+documenttype[0].datatype[6].sstruct.compression.level 0
+documenttype[0].datatype[6].sstruct.compression.threshold 95
+documenttype[0].datatype[6].sstruct.compression.minsize 200
+documenttype[0].datatype[6].sstruct.field[0].name "name"
+documenttype[0].datatype[6].sstruct.field[0].id 1160796772
+documenttype[0].datatype[6].sstruct.field[0].id_v6 774203930
+documenttype[0].datatype[6].sstruct.field[0].datatype 2
+documenttype[0].datatype[6].sstruct.field[1].name "m1"
+documenttype[0].datatype[6].sstruct.field[1].id 656260193
+documenttype[0].datatype[6].sstruct.field[1].id_v6 1013611640
+documenttype[0].datatype[6].sstruct.field[1].datatype 339965458
+documenttype[0].datatype[6].sstruct.field[2].name "m2"
+documenttype[0].datatype[6].sstruct.field[2].id 1105173090
+documenttype[0].datatype[6].sstruct.field[2].id_v6 1026497887
+documenttype[0].datatype[6].sstruct.field[2].datatype 1888564261
+documenttype[0].datatype[6].sstruct.field[3].name "m3"
+documenttype[0].datatype[6].sstruct.field[3].id 1834987989
+documenttype[0].datatype[6].sstruct.field[3].id_v6 1365320273
+documenttype[0].datatype[6].sstruct.field[3].datatype -1220861393
+documenttype[0].datatype[6].sstruct.field[4].name "m4"
+documenttype[0].datatype[6].sstruct.field[4].id 1696105521
+documenttype[0].datatype[6].sstruct.field[4].id_v6 1636310067
+documenttype[0].datatype[6].sstruct.field[4].datatype 1070047409
+documenttype[0].datatype[7].id -1659731740
+documenttype[0].datatype[7].type STRUCT
+documenttype[0].datatype[7].array.element.id 0
+documenttype[0].datatype[7].map.key.id 0
+documenttype[0].datatype[7].map.value.id 0
+documenttype[0].datatype[7].wset.key.id 0
+documenttype[0].datatype[7].wset.createifnonexistent false
+documenttype[0].datatype[7].wset.removeifzero false
+documenttype[0].datatype[7].annotationref.annotation.id 0
+documenttype[0].datatype[7].sstruct.name "maptest.body"
+documenttype[0].datatype[7].sstruct.version 0
+documenttype[0].datatype[7].sstruct.compression.type NONE
+documenttype[0].datatype[7].sstruct.compression.level 0
+documenttype[0].datatype[7].sstruct.compression.threshold 95
+documenttype[0].datatype[7].sstruct.compression.minsize 200
+documenttype[1].id -753106277
+documenttype[1].name "maptest_search"
+documenttype[1].version 0
+documenttype[1].headerstruct 919697476
+documenttype[1].bodystruct -125720743
+documenttype[1].inherits[0].id 8
+documenttype[1].datatype[0].id 3474528
+documenttype[1].datatype[0].type STRUCT
+documenttype[1].datatype[0].array.element.id 0
+documenttype[1].datatype[0].map.key.id 0
+documenttype[1].datatype[0].map.value.id 0
+documenttype[1].datatype[0].wset.key.id 0
+documenttype[1].datatype[0].wset.createifnonexistent false
+documenttype[1].datatype[0].wset.removeifzero false
+documenttype[1].datatype[0].annotationref.annotation.id 0
+documenttype[1].datatype[0].sstruct.name "s1"
+documenttype[1].datatype[0].sstruct.version 0
+documenttype[1].datatype[0].sstruct.compression.type NONE
+documenttype[1].datatype[0].sstruct.compression.level 0
+documenttype[1].datatype[0].sstruct.compression.threshold 95
+documenttype[1].datatype[0].sstruct.compression.minsize 200
+documenttype[1].datatype[0].sstruct.field[0].name "a"
+documenttype[1].datatype[0].sstruct.field[0].id 493339625
+documenttype[1].datatype[0].sstruct.field[0].id_v6 703514648
+documenttype[1].datatype[0].sstruct.field[0].datatype 2
+documenttype[1].datatype[0].sstruct.field[1].name "b"
+documenttype[1].datatype[0].sstruct.field[1].id 441632370
+documenttype[1].datatype[0].sstruct.field[1].id_v6 1420600727
+documenttype[1].datatype[0].sstruct.field[1].datatype 2
+documenttype[1].datatype[1].id 339965458
+documenttype[1].datatype[1].type MAP
+documenttype[1].datatype[1].array.element.id 0
+documenttype[1].datatype[1].map.key.id 2
+documenttype[1].datatype[1].map.value.id 2
+documenttype[1].datatype[1].wset.key.id 0
+documenttype[1].datatype[1].wset.createifnonexistent false
+documenttype[1].datatype[1].wset.removeifzero false
+documenttype[1].datatype[1].annotationref.annotation.id 0
+documenttype[1].datatype[1].sstruct.name ""
+documenttype[1].datatype[1].sstruct.version 0
+documenttype[1].datatype[1].sstruct.compression.type NONE
+documenttype[1].datatype[1].sstruct.compression.level 0
+documenttype[1].datatype[1].sstruct.compression.threshold 95
+documenttype[1].datatype[1].sstruct.compression.minsize 200
+documenttype[1].datatype[2].id 1888564261
+documenttype[1].datatype[2].type MAP
+documenttype[1].datatype[2].array.element.id 0
+documenttype[1].datatype[2].map.key.id 2
+documenttype[1].datatype[2].map.value.id 3474528
+documenttype[1].datatype[2].wset.key.id 0
+documenttype[1].datatype[2].wset.createifnonexistent false
+documenttype[1].datatype[2].wset.removeifzero false
+documenttype[1].datatype[2].annotationref.annotation.id 0
+documenttype[1].datatype[2].sstruct.name ""
+documenttype[1].datatype[2].sstruct.version 0
+documenttype[1].datatype[2].sstruct.compression.type NONE
+documenttype[1].datatype[2].sstruct.compression.level 0
+documenttype[1].datatype[2].sstruct.compression.threshold 95
+documenttype[1].datatype[2].sstruct.compression.minsize 200
+documenttype[1].datatype[3].id -1486737430
+documenttype[1].datatype[3].type ARRAY
+documenttype[1].datatype[3].array.element.id 2
+documenttype[1].datatype[3].map.key.id 0
+documenttype[1].datatype[3].map.value.id 0
+documenttype[1].datatype[3].wset.key.id 0
+documenttype[1].datatype[3].wset.createifnonexistent false
+documenttype[1].datatype[3].wset.removeifzero false
+documenttype[1].datatype[3].annotationref.annotation.id 0
+documenttype[1].datatype[3].sstruct.name ""
+documenttype[1].datatype[3].sstruct.version 0
+documenttype[1].datatype[3].sstruct.compression.type NONE
+documenttype[1].datatype[3].sstruct.compression.level 0
+documenttype[1].datatype[3].sstruct.compression.threshold 95
+documenttype[1].datatype[3].sstruct.compression.minsize 200
+documenttype[1].datatype[4].id -1220861393
+documenttype[1].datatype[4].type MAP
+documenttype[1].datatype[4].array.element.id 0
+documenttype[1].datatype[4].map.key.id 2
+documenttype[1].datatype[4].map.value.id -1486737430
+documenttype[1].datatype[4].wset.key.id 0
+documenttype[1].datatype[4].wset.createifnonexistent false
+documenttype[1].datatype[4].wset.removeifzero false
+documenttype[1].datatype[4].annotationref.annotation.id 0
+documenttype[1].datatype[4].sstruct.name ""
+documenttype[1].datatype[4].sstruct.version 0
+documenttype[1].datatype[4].sstruct.compression.type NONE
+documenttype[1].datatype[4].sstruct.compression.level 0
+documenttype[1].datatype[4].sstruct.compression.threshold 95
+documenttype[1].datatype[4].sstruct.compression.minsize 200
+documenttype[1].datatype[5].id 1070047409
+documenttype[1].datatype[5].type MAP
+documenttype[1].datatype[5].array.element.id 0
+documenttype[1].datatype[5].map.key.id 2
+documenttype[1].datatype[5].map.value.id 339965458
+documenttype[1].datatype[5].wset.key.id 0
+documenttype[1].datatype[5].wset.createifnonexistent false
+documenttype[1].datatype[5].wset.removeifzero false
+documenttype[1].datatype[5].annotationref.annotation.id 0
+documenttype[1].datatype[5].sstruct.name ""
+documenttype[1].datatype[5].sstruct.version 0
+documenttype[1].datatype[5].sstruct.compression.type NONE
+documenttype[1].datatype[5].sstruct.compression.level 0
+documenttype[1].datatype[5].sstruct.compression.threshold 95
+documenttype[1].datatype[5].sstruct.compression.minsize 200
+documenttype[1].datatype[6].id 919697476
+documenttype[1].datatype[6].type STRUCT
+documenttype[1].datatype[6].array.element.id 0
+documenttype[1].datatype[6].map.key.id 0
+documenttype[1].datatype[6].map.value.id 0
+documenttype[1].datatype[6].wset.key.id 0
+documenttype[1].datatype[6].wset.createifnonexistent false
+documenttype[1].datatype[6].wset.removeifzero false
+documenttype[1].datatype[6].annotationref.annotation.id 0
+documenttype[1].datatype[6].sstruct.name "maptest_search.header"
+documenttype[1].datatype[6].sstruct.version 0
+documenttype[1].datatype[6].sstruct.compression.type NONE
+documenttype[1].datatype[6].sstruct.compression.level 0
+documenttype[1].datatype[6].sstruct.compression.threshold 95
+documenttype[1].datatype[6].sstruct.compression.minsize 200
+documenttype[1].datatype[6].sstruct.field[0].name "name"
+documenttype[1].datatype[6].sstruct.field[0].id 1160796772
+documenttype[1].datatype[6].sstruct.field[0].id_v6 774203930
+documenttype[1].datatype[6].sstruct.field[0].datatype 2
+documenttype[1].datatype[6].sstruct.field[1].name "m1"
+documenttype[1].datatype[6].sstruct.field[1].id 656260193
+documenttype[1].datatype[6].sstruct.field[1].id_v6 1013611640
+documenttype[1].datatype[6].sstruct.field[1].datatype 339965458
+documenttype[1].datatype[6].sstruct.field[2].name "m2"
+documenttype[1].datatype[6].sstruct.field[2].id 1105173090
+documenttype[1].datatype[6].sstruct.field[2].id_v6 1026497887
+documenttype[1].datatype[6].sstruct.field[2].datatype 1888564261
+documenttype[1].datatype[6].sstruct.field[3].name "m3"
+documenttype[1].datatype[6].sstruct.field[3].id 1834987989
+documenttype[1].datatype[6].sstruct.field[3].id_v6 1365320273
+documenttype[1].datatype[6].sstruct.field[3].datatype -1220861393
+documenttype[1].datatype[6].sstruct.field[4].name "m4"
+documenttype[1].datatype[6].sstruct.field[4].id 1696105521
+documenttype[1].datatype[6].sstruct.field[4].id_v6 1636310067
+documenttype[1].datatype[6].sstruct.field[4].datatype 1070047409
+documenttype[1].datatype[6].sstruct.field[5].name "rankfeatures"
+documenttype[1].datatype[6].sstruct.field[5].id 1883197392
+documenttype[1].datatype[6].sstruct.field[5].id_v6 699950698
+documenttype[1].datatype[6].sstruct.field[5].datatype 2
+documenttype[1].datatype[6].sstruct.field[6].name "summaryfeatures"
+documenttype[1].datatype[6].sstruct.field[6].id 1840337115
+documenttype[1].datatype[6].sstruct.field[6].id_v6 1981648971
+documenttype[1].datatype[6].sstruct.field[6].datatype 2
+documenttype[1].datatype[7].id -125720743
+documenttype[1].datatype[7].type STRUCT
+documenttype[1].datatype[7].array.element.id 0
+documenttype[1].datatype[7].map.key.id 0
+documenttype[1].datatype[7].map.value.id 0
+documenttype[1].datatype[7].wset.key.id 0
+documenttype[1].datatype[7].wset.createifnonexistent false
+documenttype[1].datatype[7].wset.removeifzero false
+documenttype[1].datatype[7].annotationref.annotation.id 0
+documenttype[1].datatype[7].sstruct.name "maptest_search.body"
+documenttype[1].datatype[7].sstruct.version 0
+documenttype[1].datatype[7].sstruct.compression.type NONE
+documenttype[1].datatype[7].sstruct.compression.level 0
+documenttype[1].datatype[7].sstruct.compression.threshold 95
+documenttype[1].datatype[7].sstruct.compression.minsize 200
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.aaa.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.aaa.cfg
new file mode 100644
index 00000000000..09e13ed0f8b
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.aaa.cfg
@@ -0,0 +1,11 @@
+length 256
+max_match_candidates 1000
+max_matches 3
+min_length 128
+prefix true
+stem_max_extend 3
+stem_min_length 5
+surround_max 128
+winsize 200
+winsize_fallback_multiplier 10.0
+override[0]
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.simple.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.simple.cfg
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/juniperrc.simple.cfg
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.aaa.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.aaa.cfg
new file mode 100644
index 00000000000..cf3ee6a7179
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.aaa.cfg
@@ -0,0 +1,43 @@
+rankprofile[2]
+rankprofile[0].name "default"
+rankprofile[0].fef.property[15]
+rankprofile[0].fef.property[00].name "vespa.summary.feature"
+rankprofile[0].fef.property[00].value "fieldTermMatch(m1.key,0).firstPosition"
+rankprofile[0].fef.property[01].name "vespa.summary.feature"
+rankprofile[0].fef.property[01].value "fieldTermMatch(m1.key,0).occurrences"
+rankprofile[0].fef.property[02].name "vespa.summary.feature"
+rankprofile[0].fef.property[02].value "fieldLength(m1.key)"
+rankprofile[0].fef.property[03].name "vespa.summary.feature"
+rankprofile[0].fef.property[03].value "fieldTermMatch(m1.value,0).firstPosition"
+rankprofile[0].fef.property[04].name "vespa.summary.feature"
+rankprofile[0].fef.property[04].value "fieldTermMatch(m1.value,0).occurrences"
+rankprofile[0].fef.property[05].name "vespa.summary.feature"
+rankprofile[0].fef.property[05].value "fieldLength(m1.value)"
+rankprofile[0].fef.property[06].name "vespa.summary.feature"
+rankprofile[0].fef.property[06].value "fieldTermMatch(m2.value.a,0).firstPosition"
+rankprofile[0].fef.property[07].name "vespa.summary.feature"
+rankprofile[0].fef.property[07].value "fieldTermMatch(m2.value.a,0).occurrences"
+rankprofile[0].fef.property[08].name "vespa.summary.feature"
+rankprofile[0].fef.property[08].value "fieldLength(m2.value.a)"
+rankprofile[0].fef.property[09].name "vespa.summary.feature"
+rankprofile[0].fef.property[09].value "fieldTermMatch(m3.value,0).firstPosition"
+rankprofile[0].fef.property[10].name "vespa.summary.feature"
+rankprofile[0].fef.property[10].value "fieldTermMatch(m3.value,0).occurrences"
+rankprofile[0].fef.property[11].name "vespa.summary.feature"
+rankprofile[0].fef.property[11].value "fieldLength(m3.value)"
+rankprofile[0].fef.property[12].name "vespa.summary.feature"
+rankprofile[0].fef.property[12].value "fieldTermMatch(m4.value.value,0).firstPosition"
+rankprofile[0].fef.property[13].name "vespa.summary.feature"
+rankprofile[0].fef.property[13].value "fieldTermMatch(m4.value.value,0).occurrences"
+rankprofile[0].fef.property[14].name "vespa.summary.feature"
+rankprofile[0].fef.property[14].value "fieldLength(m4.value.value)"
+rankprofile[1].name "unranked"
+rankprofile[1].fef.property[4]
+rankprofile[1].fef.property[0].name "vespa.rank.firstphase"
+rankprofile[1].fef.property[0].value "value(0)"
+rankprofile[1].fef.property[1].name "vespa.hitcollector.heapsize"
+rankprofile[1].fef.property[1].value "0"
+rankprofile[1].fef.property[2].name "vespa.hitcollector.arraysize"
+rankprofile[1].fef.property[2].value "0"
+rankprofile[1].fef.property[3].name "vespa.dump.ignoredefaultfeatures"
+rankprofile[1].fef.property[3].value "true"
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.simple.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.simple.cfg
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/rank-profiles.simple.cfg
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/summary.aaa.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/summary.aaa.cfg
new file mode 100644
index 00000000000..8cc32ed5ea4
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/summary.aaa.cfg
@@ -0,0 +1,30 @@
+defaultsummaryid 197963550
+classes[2]
+classes[0].id 190911431
+classes[0].name "attributeprefetch"
+classes[0].fields[3]
+classes[0].fields[0].name "name"
+classes[0].fields[0].type "longstring"
+classes[0].fields[1].name "rankfeatures"
+classes[0].fields[1].type "longstring"
+classes[0].fields[2].name "summaryfeatures"
+classes[0].fields[2].type "longstring"
+classes[1].id 197963550
+classes[1].name "maptest"
+classes[1].fields[8]
+classes[1].fields[0].name "documentid"
+classes[1].fields[0].type "longstring"
+classes[1].fields[1].name "m1"
+classes[1].fields[1].type "jsonstring"
+classes[1].fields[2].name "m2"
+classes[1].fields[2].type "jsonstring"
+classes[1].fields[3].name "m3"
+classes[1].fields[3].type "jsonstring"
+classes[1].fields[4].name "m4"
+classes[1].fields[4].type "jsonstring"
+classes[1].fields[5].name "name"
+classes[1].fields[5].type "longstring"
+classes[1].fields[6].name "rankfeatures"
+classes[1].fields[6].type "longstring"
+classes[1].fields[7].name "summaryfeatures"
+classes[1].fields[7].type "longstring"
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/summary.simple.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/summary.simple.cfg
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/summary.simple.cfg
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/summarymap.aaa.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/summarymap.aaa.cfg
new file mode 100644
index 00000000000..1c9567431ae
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/summarymap.aaa.cfg
@@ -0,0 +1,11 @@
+defaultoutputclass -1
+override[3]
+override[0].arguments "name"
+override[0].command "attribute"
+override[0].field "name"
+override[1].arguments ""
+override[1].command "rankfeatures"
+override[1].field "rankfeatures"
+override[2].arguments ""
+override[2].command "summaryfeatures"
+override[2].field "summaryfeatures"
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/summarymap.simple.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/summarymap.simple.cfg
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/summarymap.simple.cfg
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.aaa.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.aaa.cfg
new file mode 100644
index 00000000000..4d976764bd9
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.aaa.cfg
@@ -0,0 +1,98 @@
+documentverificationlevel 0
+searchall 1
+documenttype[1]
+documenttype[0].name "maptest"
+documenttype[0].index[17]
+documenttype[0].index[00].name "m1"
+documenttype[0].index[00].field[2]
+documenttype[0].index[00].field[0].name "m1.key"
+documenttype[0].index[00].field[1].name "m1.value"
+documenttype[0].index[01].name "m1.key"
+documenttype[0].index[01].field[1]
+documenttype[0].index[01].field[0].name "m1.key"
+documenttype[0].index[02].name "m1.value"
+documenttype[0].index[02].field[1]
+documenttype[0].index[02].field[0].name "m1.value"
+documenttype[0].index[03].name "m2"
+documenttype[0].index[03].field[3]
+documenttype[0].index[03].field[0].name "m2.key"
+documenttype[0].index[03].field[1].name "m2.value.a"
+documenttype[0].index[03].field[2].name "m2.value.b"
+documenttype[0].index[04].name "m2.key"
+documenttype[0].index[04].field[1]
+documenttype[0].index[04].field[0].name "m2.key"
+documenttype[0].index[05].name "m2.value"
+documenttype[0].index[05].field[2]
+documenttype[0].index[05].field[0].name "m2.value.a"
+documenttype[0].index[05].field[1].name "m2.value.b"
+documenttype[0].index[06].name "m2.value.a"
+documenttype[0].index[06].field[1]
+documenttype[0].index[06].field[0].name "m2.value.a"
+documenttype[0].index[07].name "m2.value.b"
+documenttype[0].index[07].field[1]
+documenttype[0].index[07].field[0].name "m2.value.b"
+documenttype[0].index[08].name "m3"
+documenttype[0].index[08].field[2]
+documenttype[0].index[08].field[0].name "m3.key"
+documenttype[0].index[08].field[1].name "m3.value"
+documenttype[0].index[09].name "m3.key"
+documenttype[0].index[09].field[1]
+documenttype[0].index[09].field[0].name "m3.key"
+documenttype[0].index[10].name "m3.value"
+documenttype[0].index[10].field[1]
+documenttype[0].index[10].field[0].name "m3.value"
+documenttype[0].index[11].name "m4"
+documenttype[0].index[11].field[3]
+documenttype[0].index[11].field[0].name "m4.key"
+documenttype[0].index[11].field[1].name "m4.value.key"
+documenttype[0].index[11].field[2].name "m4.value.value"
+documenttype[0].index[12].name "m4.key"
+documenttype[0].index[12].field[1]
+documenttype[0].index[12].field[0].name "m4.key"
+documenttype[0].index[13].name "m4.value"
+documenttype[0].index[13].field[2]
+documenttype[0].index[13].field[0].name "m4.value.key"
+documenttype[0].index[13].field[1].name "m4.value.value"
+documenttype[0].index[14].name "m4.value.key"
+documenttype[0].index[14].field[1]
+documenttype[0].index[14].field[0].name "m4.value.key"
+documenttype[0].index[15].name "m4.value.value"
+documenttype[0].index[15].field[1]
+documenttype[0].index[15].field[0].name "m4.value.value"
+documenttype[0].index[16].name "name"
+documenttype[0].index[16].field[1]
+documenttype[0].index[16].field[0].name "name"
+fieldspec[11]
+fieldspec[00].arg1 ""
+fieldspec[00].name "m1.key"
+fieldspec[00].searchmethod AUTOUTF8
+fieldspec[01].arg1 ""
+fieldspec[01].name "m1.value"
+fieldspec[01].searchmethod AUTOUTF8
+fieldspec[02].arg1 ""
+fieldspec[02].name "m2.key"
+fieldspec[02].searchmethod AUTOUTF8
+fieldspec[03].arg1 ""
+fieldspec[03].name "m2.value.a"
+fieldspec[03].searchmethod AUTOUTF8
+fieldspec[04].arg1 ""
+fieldspec[04].name "m2.value.b"
+fieldspec[04].searchmethod AUTOUTF8
+fieldspec[05].arg1 ""
+fieldspec[05].name "m3.key"
+fieldspec[05].searchmethod AUTOUTF8
+fieldspec[06].arg1 ""
+fieldspec[06].name "m3.value"
+fieldspec[06].searchmethod AUTOUTF8
+fieldspec[07].arg1 ""
+fieldspec[07].name "m4.key"
+fieldspec[07].searchmethod AUTOUTF8
+fieldspec[08].arg1 ""
+fieldspec[08].name "m4.value.key"
+fieldspec[08].searchmethod AUTOUTF8
+fieldspec[09].arg1 ""
+fieldspec[09].name "m4.value.value"
+fieldspec[09].searchmethod AUTOUTF8
+fieldspec[10].arg1 ""
+fieldspec[10].name "name"
+fieldspec[10].searchmethod AUTOUTF8
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.simple.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.simple.cfg
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/vsmfields.simple.cfg
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.aaa.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.aaa.cfg
new file mode 100644
index 00000000000..664778d40a8
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.aaa.cfg
@@ -0,0 +1,26 @@
+outputclass ""
+fieldmap[6]
+fieldmap[0].command NONE
+fieldmap[0].summary "m1"
+fieldmap[0].document[1]
+fieldmap[0].document[0].field "m1"
+fieldmap[1].command NONE
+fieldmap[1].summary "m2"
+fieldmap[1].document[3]
+fieldmap[1].document[0].field "m2.key"
+fieldmap[1].document[1].field "m2.value.a"
+fieldmap[1].document[2].field "m2.value.b"
+fieldmap[2].command NONE
+fieldmap[2].summary "m3"
+fieldmap[2].document[1]
+fieldmap[2].document[0].field "m3"
+fieldmap[3].command NONE
+fieldmap[3].summary "m4"
+fieldmap[3].document[1]
+fieldmap[3].document[0].field "m4"
+fieldmap[4].command NONE
+fieldmap[4].summary "rankfeatures"
+fieldmap[4].document[0]
+fieldmap[5].command NONE
+fieldmap[5].summary "summaryfeatures"
+fieldmap[5].document[0]
diff --git a/streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.simple.cfg b/streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.simple.cfg
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/cfg/vsmsummary.simple.cfg
diff --git a/streamingvisitors/src/tests/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/tests/searchvisitor/searchvisitor.cpp
new file mode 100644
index 00000000000..77df70ad256
--- /dev/null
+++ b/streamingvisitors/src/tests/searchvisitor/searchvisitor.cpp
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/searchvisitor/searchenvironment.h>
+#include <vespa/searchvisitor/searchvisitor.h>
+#include <vespa/storage/frameworkimpl/component/storagecomponentregisterimpl.h>
+#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h>
+
+using namespace search;
+using namespace search::query;
+using namespace document;
+
+namespace storage {
+
+class SearchVisitorTest : public vespalib::TestApp
+{
+private:
+ framework::defaultimplementation::FakeClock _clock;
+ StorageComponentRegisterImpl _componentRegister;
+ std::unique_ptr<StorageComponent> _component;
+ SearchEnvironment _env;
+ void testSearchVisitor();
+ void testSearchEnvironment();
+ void testCreateSearchVisitor(const vespalib::string & dir, const vdslib::Parameters & parameters);
+ void testOnlyRequireWeakReadConsistency();
+
+public:
+ SearchVisitorTest();
+ int Main();
+};
+
+SearchVisitorTest::SearchVisitorTest() :
+ vespalib::TestApp(),
+ _componentRegister(),
+ _env("dir:cfg")
+{
+ _componentRegister.setNodeInfo("mycluster", lib::NodeType::STORAGE, 1);
+ _componentRegister.setClock(_clock);
+ StorageComponent::DocumentTypeRepoSP repo(new DocumentTypeRepo(readDocumenttypesConfig("cfg/documenttypes.cfg")));
+ _componentRegister.setDocumentTypeRepo(repo);
+ _component.reset(new StorageComponent(_componentRegister, "storage"));
+};
+
+std::vector<spi::DocEntry::LP>
+createDocuments(const vespalib::string & dir)
+{
+ (void) dir;
+ std::vector<spi::DocEntry::LP> documents;
+ spi::Timestamp ts;
+ document::Document::UP doc(new document::Document());
+ spi::DocEntry::LP e(new spi::DocEntry(ts, 0, std::move(doc)));
+ documents.push_back(e);
+ return documents;
+}
+
+void
+SearchVisitorTest::testCreateSearchVisitor(const vespalib::string & dir, const vdslib::Parameters & params)
+{
+ SearchVisitorFactory sFactory(dir);
+ VisitorFactory & factory(sFactory);
+ std::unique_ptr<Visitor> sv(static_cast<SearchVisitor *>(factory.makeVisitor(*_component, _env, params)));
+ document::OrderingSpecification orderSpec;
+ document::BucketId bucketId;
+ std::vector<spi::DocEntry::LP> documents(createDocuments(dir));
+ Visitor::HitCounter hitCounter(&orderSpec);
+ sv->handleDocuments(bucketId, documents, hitCounter);
+}
+
+void
+SearchVisitorTest::testSearchEnvironment()
+{
+ EXPECT_TRUE(_env.getVSMAdapter("simple") != NULL);
+ EXPECT_TRUE(_env.getRankManager("simple") != NULL);
+}
+
+void
+SearchVisitorTest::testSearchVisitor()
+{
+ vdslib::Parameters params;
+ params.set("searchcluster", "aaa");
+ params.set("queryflags", "0x40000");
+ params.set("summarycount", "3");
+ params.set("summaryclass", "petra");
+ params.set("rankprofile", "default");
+
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addStringTerm("maptest", "sddocname", 0, Weight(0));
+ Node::UP node = builder.build();
+ vespalib::string stackDump = StackDumpCreator::create(*node);
+
+ params.set("query", stackDump);
+ testCreateSearchVisitor("dir:cfg", params);
+}
+
+void
+SearchVisitorTest::testOnlyRequireWeakReadConsistency()
+{
+ SearchVisitorFactory factory("dir:cfg");
+ VisitorFactory& factoryBase(factory);
+ vdslib::Parameters params;
+ std::unique_ptr<Visitor> sv(
+ factoryBase.makeVisitor(*_component, _env, params));
+ EXPECT_TRUE(sv->getRequiredReadConsistency() == spi::ReadConsistency::WEAK);
+}
+
+int
+SearchVisitorTest::Main()
+{
+ TEST_INIT("searchvisitor_test");
+
+ testSearchVisitor(); TEST_FLUSH();
+ testSearchEnvironment(); TEST_FLUSH();
+ testOnlyRequireWeakReadConsistency(); TEST_FLUSH();
+
+ TEST_DONE();
+}
+
+} // namespace storage
+
+TEST_APPHOOK(storage::SearchVisitorTest)
diff --git a/streamingvisitors/src/vespa/searchvisitor/.gitignore b/streamingvisitors/src/vespa/searchvisitor/.gitignore
new file mode 100644
index 00000000000..a01e73125b3
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/.gitignore
@@ -0,0 +1,9 @@
+*.So
+*.lo
+.*.swp
+.depend
+.depend.NEW
+.deps
+.libs
+Makefile
+/libsearchvisitor.so.5.1
diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt
new file mode 100644
index 00000000000..f31bae302d1
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(streamingvisitors_searchvisitor
+ SOURCES
+ hitcollector.cpp
+ indexenvironment.cpp
+ queryenvironment.cpp
+ querytermdata.cpp
+ querywrapper.cpp
+ rankmanager.cpp
+ rankprocessor.cpp
+ searchenvironment.cpp
+ searchvisitor.cpp
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
new file mode 100644
index 00000000000..10b752adf9e
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -0,0 +1,149 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.hitcollector");
+#include "hitcollector.h"
+#include <stdexcept>
+
+using search::FeatureSet;
+using search::fef::MatchData;
+using vdslib::SearchResult;
+
+namespace storage {
+
+HitCollector::HitCollector(size_t wantedHits) :
+ _hits(),
+ _sortedByDocId(true)
+{
+ _hits.reserve(wantedHits);
+}
+
+const vsm::Document &
+HitCollector::getDocSum(const search::DocumentIdT & docId) const
+{
+ for (HitVector::const_iterator it(_hits.begin()), mt(_hits.end()); it < mt; it++) {
+ if (docId == it->getDocId()) {
+ return *it->getDocument();
+ }
+ }
+ throw std::runtime_error(vespalib::make_string("Could not look up document id %d", docId));
+}
+
+bool
+HitCollector::addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data, double score)
+{
+ Hit h(doc, data, score);
+ return addHit(h);
+}
+
+bool
+HitCollector::addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data,
+ double score, const void * sortData, size_t sortDataLen)
+{
+ Hit h(doc, data, score, sortData, sortDataLen);
+ return addHit(h);
+}
+
+void
+HitCollector::sortByDocId()
+{
+ if (!_sortedByDocId) {
+ std::sort(_hits.begin(), _hits.end()); // sort on docId
+ _sortedByDocId = true;
+ }
+}
+
+bool
+HitCollector::addHitToHeap(const Hit & hit) const
+{
+ // return true if the given hit is better than the current worst one.
+ return (hit.getSortBlob().empty())
+ ? (hit.cmpRank(_hits[0]) < 0)
+ : (hit.cmpSort(_hits[0]) < 0);
+}
+
+bool
+HitCollector::addHit(const Hit & hit)
+{
+ bool amongTheBest(false);
+ ssize_t avail = (_hits.capacity() - _hits.size());
+ bool useSortBlob( ! hit.getSortBlob().empty() );
+ if (avail > 1) {
+ // No heap yet.
+ _hits.push_back(hit);
+ amongTheBest = true;
+ } else if (_hits.capacity() == 0) {
+ // this happens when wantedHitCount = 0
+ // in this case we shall not put anything on the heap (which is empty)
+ } else if ( avail == 0 && addHitToHeap(hit)) { // already a heap
+ if (useSortBlob) {
+ std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
+ } else {
+ std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
+ }
+
+ _hits.back() = hit;
+ amongTheBest = true;
+
+ if (useSortBlob) {
+ std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
+ } else {
+ std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
+ }
+ } else if (avail == 1) { // make a heap of the hit vector
+ _hits.push_back(hit);
+ amongTheBest = true;
+ if (useSortBlob) {
+ std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
+ } else {
+ std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
+ }
+ _sortedByDocId = false; // the hit vector is no longer sorted by docId
+ }
+ return amongTheBest;
+}
+
+void
+HitCollector::fillSearchResult(vdslib::SearchResult & searchResult)
+{
+ sortByDocId();
+ for (HitVector::const_iterator it(_hits.begin()), mt(_hits.end()); it != mt; it++) {
+ vespalib::string documentId(it->getDocument()->docDoc().getId().toString());
+ search::DocumentIdT docId = it->getDocId();
+ SearchResult::RankType rank = it->getRankScore();
+
+ LOG(debug, "fillSearchResult: gDocId(%s), lDocId(%u), rank(%f)", documentId.c_str(), docId, (float)rank);
+
+ if (it->getSortBlob().empty()) {
+ searchResult.addHit(docId, documentId.c_str(), rank);
+ } else {
+ searchResult.addHit(docId, documentId.c_str(), rank, it->getSortBlob().c_str(), it->getSortBlob().size());
+ }
+ }
+}
+
+FeatureSet::SP
+HitCollector::getFeatureSet(IRankProgram &rankProgram,
+ const std::vector<vespalib::string> & names,
+ const std::vector<search::fef::FeatureHandle> & handles)
+{
+ if (names.empty() || _hits.empty()) {
+ return FeatureSet::SP(new FeatureSet());
+ }
+ sortByDocId();
+ FeatureSet::SP retval = FeatureSet::SP(new FeatureSet(names, _hits.size()));
+ for (HitVector::iterator it(_hits.begin()), mt(_hits.end()); it != mt; ++it) {
+ const MatchData &matchData = rankProgram.run(it->getDocId(), it->getMatchData());
+ uint32_t docId = matchData.getDocId();
+ search::feature_t * f = retval->getFeaturesByIndex(retval->addDocId(docId));
+ for (uint32_t j = 0; j < names.size(); ++j) {
+ f[j] = *matchData.resolveFeature(handles[j]);
+ LOG(debug, "getFeatureSet: lDocId(%u), '%s': %f", docId, names[j].c_str(), f[j]);
+ }
+ }
+ return retval;
+}
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
new file mode 100644
index 00000000000..11c799f1c06
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
@@ -0,0 +1,145 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/featureset.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/vdslib/container/searchresult.h>
+#include <vespa/vsm/common/docsum.h>
+#include <vespa/vsm/common/storagedocument.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace storage {
+
+/**
+ * This class is used to store hits and MatchData objects for the m best hits.
+ **/
+class HitCollector : public vsm::IDocSumCache
+{
+private:
+ class Hit
+ {
+ public:
+ Hit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & matchData,
+ double score, const void * sortData, size_t sortDataLen) :
+ _docid(matchData.getDocId()),
+ _score(score),
+ _document(doc),
+ _matchData(),
+ _sortBlob(sortData, sortDataLen)
+ {
+ _matchData.reserve(matchData.getNumTermFields());
+ for (search::fef::TermFieldHandle handle = 0; handle < matchData.getNumTermFields(); ++handle) {
+ _matchData.emplace_back(*matchData.resolveTermField(handle));
+ }
+ }
+ Hit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & matchData, double score)
+ : Hit(doc, matchData, score, nullptr, 0) {}
+ search::DocumentIdT getDocId() const { return _docid; }
+ const vsm::StorageDocument::SP & getDocument() const { return _document; }
+ const std::vector<search::fef::TermFieldMatchData> &getMatchData() const { return _matchData; }
+ search::feature_t getRankScore() const { return _score; }
+ const vespalib::string & getSortBlob() const { return _sortBlob; }
+ bool operator < (const Hit & b) const { return getDocId() < b.getDocId(); }
+ int cmpDocId(const Hit & b) const { return getDocId() - b.getDocId(); }
+ int cmpRank(const Hit & b) const {
+ return (getRankScore() > b.getRankScore()) ?
+ -1 : ((getRankScore() < b.getRankScore()) ? 1 : cmpDocId(b));
+ }
+ int cmpSort(const Hit & b) const {
+ int diff = _sortBlob.compare(b._sortBlob.c_str(), b._sortBlob.size());
+ return (diff == 0) ? cmpDocId(b) : diff;
+ }
+ class RankComparator {
+ public:
+ RankComparator() {}
+ bool operator() (const Hit & lhs, const Hit & rhs) const {
+ return lhs.cmpRank(rhs) < 0;
+ }
+ };
+ class SortComparator {
+ public:
+ SortComparator() {}
+ bool operator() (const Hit & lhs, const Hit & rhs) const {
+ return lhs.cmpSort(rhs) < 0;
+ }
+ };
+
+ private:
+ uint32_t _docid;
+ double _score;
+ vsm::StorageDocument::SP _document;
+ std::vector<search::fef::TermFieldMatchData> _matchData;
+ vespalib::string _sortBlob;
+ };
+ typedef std::vector<Hit> HitVector;
+ HitVector _hits;
+ bool _sortedByDocId; // flag for whether the hit vector is sorted on docId
+
+ void sortByDocId();
+ bool addHitToHeap(const Hit & hit) const;
+ bool addHit(const Hit & hit);
+
+public:
+ typedef std::unique_ptr<HitCollector> UP;
+
+ struct IRankProgram {
+ virtual ~IRankProgram() {}
+ virtual const search::fef::MatchData &run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) = 0;
+ };
+
+ HitCollector(size_t wantedHits);
+
+ virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const;
+
+ /**
+ * Adds a hit to this hit collector.
+ * Make sure that the hits are added in increasing local docId order.
+ * If you add a NULL document you should not use getDocSum() or fillSearchResult(),
+ * as these functions expect valid documents.
+ *
+ * @param doc The document that is a hit.
+ * @param data The match data for the hit.
+ * @return true if the document was added to the heap
+ **/
+ bool addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data, double score);
+
+ /**
+ * Adds a hit to this hit collector.
+ * Make sure that the hits are added in increasing local docId order.
+ * If you add a NULL document you should not use getDocSum() or fillSearchResult(),
+ * as these functions expect valid documents.
+ *
+ * @param doc The document that is a hit.
+ * @param data The match data for the hit.
+ * @param sortData The buffer of the sortdata.
+ * @param sortDataLen The length of the sortdata.
+ * @return true if the document was added to the heap
+ **/
+ bool addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data,
+ double score, const void * sortData, size_t sortDataLen);
+
+ /**
+ * Fills the given search result with the m best hits from the hit heap.
+ * Invoking this method will destroy the heap property of the hit heap.
+ **/
+ void fillSearchResult(vdslib::SearchResult & searchResult);
+
+ /**
+ * Extract features from the hits stored in the hit heap.
+ * Invoking this method will destroy the heap property of the hit heap.
+ * Note that this method will calculate any additional features.
+ *
+ * @return features for all hits on the heap.
+ * @param rankProgram the rank program used to calculate all features.
+ * @param names names of all features.
+ * @param handles handles of all features.
+ **/
+ search::FeatureSet::SP getFeatureSet(IRankProgram &rankProgram,
+ const std::vector<vespalib::string> & names,
+ const std::vector<search::fef::FeatureHandle> & handles);
+
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
new file mode 100644
index 00000000000..1dc6a096f2e
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.indexenvironment");
+#include "indexenvironment.h"
+
+using namespace search::fef;
+
+namespace storage {
+
+IndexEnvironment::IndexEnvironment(const ITableManager & tableManager) :
+ _tableManager(&tableManager),
+ _properties(),
+ _fields(),
+ _fieldNames(),
+ _motivation(RANK),
+ _rankAttributes(),
+ _dumpAttributes()
+{
+}
+
+bool
+IndexEnvironment::addField(const vespalib::string & name, bool isAttribute)
+{
+ if (getFieldByName(name) != NULL) {
+ return false;
+ }
+ FieldInfo info(isAttribute ? FieldType::ATTRIBUTE : FieldType::INDEX, CollectionType::SINGLE, name, _fields.size());
+ info.addAttribute(); // we are able to produce needed attributes at query time
+ _fields.push_back(info);
+ _fieldNames[info.name()] = info.id();
+ return true;
+}
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
new file mode 100644
index 00000000000..5eabf0525e8
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/iindexenvironment.h>
+#include <vespa/searchlib/fef/itablemanager.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <set>
+
+namespace storage {
+
+/**
+ * Implementation of the feature execution framework
+ * index environment API for the search visitor.
+ **/
+class IndexEnvironment : public search::fef::IIndexEnvironment
+{
+private:
+ typedef vespalib::hash_map<vespalib::string, uint32_t> StringInt32Map;
+ const search::fef::ITableManager * _tableManager;
+ search::fef::Properties _properties;
+ std::vector<search::fef::FieldInfo> _fields;
+ StringInt32Map _fieldNames;
+ mutable FeatureMotivation _motivation;
+ mutable std::set<vespalib::string> _rankAttributes;
+ mutable std::set<vespalib::string> _dumpAttributes;
+
+public:
+ IndexEnvironment(const search::fef::ITableManager & tableManager);
+
+ // inherit documentation
+ virtual const search::fef::Properties & getProperties() const { return _properties; }
+
+ // inherit documentation
+ virtual uint32_t getNumFields() const { return _fields.size(); }
+
+ // inherit documentation
+ virtual const search::fef::FieldInfo * getField(uint32_t id) const {
+ if (id >= _fields.size()) {
+ return NULL;
+ }
+ return &_fields[id];
+ }
+
+ // inherit documentation
+ virtual const search::fef::FieldInfo * getFieldByName(const string & name) const {
+ StringInt32Map::const_iterator itr = _fieldNames.find(name);
+ if (itr == _fieldNames.end()) {
+ return NULL;
+ }
+ return getField(itr->second);
+ }
+
+ // inherit documentation
+ virtual const search::fef::ITableManager & getTableManager() const { return *_tableManager; }
+
+ virtual FeatureMotivation getFeatureMotivation() const override {
+ return _motivation;
+ }
+
+ // inherit documentation
+ virtual void hintFeatureMotivation(FeatureMotivation motivation) const {
+ _motivation = motivation;
+ }
+
+ // inherit documentation
+ virtual void hintFieldAccess(uint32_t) const {}
+
+ // inherit documentation
+ virtual void hintAttributeAccess(const string & name) const {
+ if (name.empty()) {
+ return;
+ }
+ if (_motivation == RANK) {
+ _rankAttributes.insert(name);
+ } else {
+ _dumpAttributes.insert(name);
+ }
+ }
+
+ bool addField(const vespalib::string & name, bool isAttribute);
+
+ search::fef::Properties & getProperties() { return _properties; }
+
+ const std::set<vespalib::string> & getHintedRankAttributes() const { return _rankAttributes; }
+
+ const std::set<vespalib::string> & getHintedDumpAttributes() const { return _dumpAttributes; }
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp
new file mode 100644
index 00000000000..ca90df395fd
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.queryenvironment");
+#include "queryenvironment.h"
+#include <vespa/searchlib/common/location.h>
+
+using search::IAttributeManager;
+using search::fef::Properties;
+using vespalib::string;
+
+namespace storage {
+
+namespace {
+
+search::fef::Location parseLocation(const string & location_str)
+{
+ search::fef::Location fefLocation;
+ if (location_str.empty()) {
+ return fefLocation;
+ }
+ string::size_type pos = location_str.find(':');
+ if (pos == string::npos) {
+ LOG(warning, "Location string lacks attribute vector specification. loc='%s'. Location ignored.",
+ location_str.c_str());
+ return fefLocation;
+ }
+ string attr = location_str.substr(0, pos);
+ const string location = location_str.substr(pos + 1);
+
+ search::common::Location locationSpec;
+ if (!locationSpec.parse(location)) {
+ LOG(warning, "Location parse error (location: '%s'): %s. Location ignored.",
+ location.c_str(), locationSpec.getParseError());
+ return fefLocation;
+ }
+ fefLocation.setAttribute(attr);
+ fefLocation.setXPosition(locationSpec.getX());
+ fefLocation.setYPosition(locationSpec.getY());
+ fefLocation.setXAspect(locationSpec.getXAspect());
+ fefLocation.setValid(true);
+ return fefLocation;
+}
+
+}
+
+QueryEnvironment::QueryEnvironment(const string & location_str,
+ const IndexEnvironment & indexEnv,
+ const Properties & properties,
+ const IAttributeManager * attrMgr) :
+ _indexEnv(indexEnv),
+ _properties(properties),
+ _attrCtx(attrMgr->createContext()),
+ _queryTerms(),
+ _location(parseLocation(location_str))
+{
+}
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h
new file mode 100644
index 00000000000..48d0ef7645a
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/fef/iindexenvironment.h>
+#include <vespa/searchlib/fef/iqueryenvironment.h>
+#include <vespa/searchlib/fef/location.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "indexenvironment.h"
+
+namespace storage {
+
+/**
+ * Implementation of the feature execution framework
+ * query environment API for the search visitor.
+ **/
+class QueryEnvironment : public search::fef::IQueryEnvironment
+{
+private:
+ const IndexEnvironment &_indexEnv;
+ const search::fef::Properties &_properties;
+ search::attribute::IAttributeContext::UP _attrCtx;
+ std::vector<const search::fef::ITermData *> _queryTerms;
+ search::fef::Location _location;
+
+public:
+ typedef std::unique_ptr<QueryEnvironment> UP;
+
+ QueryEnvironment(const vespalib::string & location,
+ const IndexEnvironment & indexEnv,
+ const search::fef::Properties & properties,
+ const search::IAttributeManager * attrMgr = NULL);
+
+ // inherit documentation
+ virtual const search::fef::Properties & getProperties() const { return _properties; }
+
+ // inherit documentation
+ virtual uint32_t getNumTerms() const { return _queryTerms.size(); }
+
+ // inherit documentation
+ virtual const search::fef::ITermData *getTerm(uint32_t idx) const {
+ if (idx >= _queryTerms.size()) {
+ return NULL;
+ }
+ return _queryTerms[idx];
+ }
+
+ // inherit documentation
+ virtual const search::fef::Location & getLocation() const { return _location; }
+
+ // inherit documentation
+ virtual const search::attribute::IAttributeContext & getAttributeContext() const { return *_attrCtx; }
+
+ // inherit documentation
+ virtual const search::fef::IIndexEnvironment & getIndexEnvironment() const { return _indexEnv; }
+
+ void addTerm(const search::fef::ITermData *term) { _queryTerms.push_back(term); }
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.cpp b/streamingvisitors/src/vespa/searchvisitor/querytermdata.cpp
new file mode 100644
index 00000000000..7b2bcd1e4d5
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.cpp
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.querytermdata");
+#include "querytermdata.h"
+
+using namespace search::fef;
+
+namespace storage {
+
+IMPLEMENT_DUPLICATE(QueryTermData);
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
new file mode 100644
index 00000000000..ef6d96e6e39
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/simpletermdata.h>
+#include <vespa/searchlib/query/querynoderesultbase.h>
+
+namespace storage {
+
+/**
+ * This class keeps data for a query term that is used by the ranking framework.
+ **/
+class QueryTermData : public search::QueryNodeResultBase
+{
+private:
+ search::fef::SimpleTermData _termData;
+
+public:
+ DUPLICATE(QueryTermData); // create duplicate function
+
+ virtual bool evaluate() const { return true; }
+ virtual void reset() {}
+ virtual bool getRewriteFloatTerms() const { return true; }
+
+ search::fef::SimpleTermData &getTermData() { return _termData; }
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp b/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp
new file mode 100644
index 00000000000..f375b532839
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.querywrapper");
+#include "querywrapper.h"
+
+using namespace search;
+
+namespace storage {
+
+QueryWrapper::PhraseList::PhraseList(Query & query) :
+ _phrases()
+{
+ QueryNodeRefList phrases;
+ query.getPhrases(phrases);
+ for (size_t i = 0; i < phrases.size(); ++i) {
+ _phrases.push_back(static_cast<PhraseQueryNode *>(phrases[i]));
+ }
+}
+
+PhraseQueryNode *
+QueryWrapper::PhraseList::findPhrase(QueryTerm * term, size_t & index)
+{
+ for (size_t i = 0; i < _phrases.size(); ++i) {
+ for (size_t j = 0; j < _phrases[i]->size(); ++j) {
+ if ((*_phrases[i])[j].get() == term) {
+ index = j;
+ return _phrases[i];
+ }
+ }
+ }
+ return NULL;
+}
+
+QueryWrapper::QueryWrapper(Query & query) :
+ _phraseList(query),
+ _termList()
+{
+ QueryTermList leafs;
+ query.getLeafs(leafs);
+ for (size_t i = 0; i < leafs.size(); ++i) {
+ size_t index = 0;
+ PhraseQueryNode * parent = _phraseList.findPhrase(leafs[i], index);
+ _termList.push_back(Term(leafs[i], parent, index));
+ }
+}
+
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h
new file mode 100644
index 00000000000..beeda493197
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/query/querynode.h>
+
+namespace storage {
+
+/**
+ * This class wraps a query and adds extra information to the list of leaf terms.
+ **/
+class QueryWrapper
+{
+public:
+ class PhraseList {
+ private:
+ std::vector<search::PhraseQueryNode *> _phrases;
+
+ public:
+ PhraseList(search::Query & query);
+ search::PhraseQueryNode * findPhrase(search::QueryTerm * term, size_t & index);
+ };
+
+ class Term {
+ private:
+ search::QueryTerm * _term;
+ search::PhraseQueryNode * _parent;
+ size_t _index;
+
+ public:
+ Term() :
+ _term(NULL),
+ _parent(NULL),
+ _index(0)
+ {
+ }
+ Term(search::QueryTerm * term, search::PhraseQueryNode * parent, size_t index) :
+ _term(term),
+ _parent(parent),
+ _index(index)
+ {
+ }
+ search::QueryTerm * getTerm() { return _term; }
+ search::PhraseQueryNode * getParent() { return _parent; }
+ size_t getIndex() const { return _index; }
+ bool isPhraseTerm() const { return _parent != NULL; }
+ bool isFirstPhraseTerm() const { return isPhraseTerm() && getIndex() == 0; }
+ size_t getPosAdjust() const { return _parent != NULL ? _parent->width() - 1 : 0; }
+ };
+
+ typedef std::vector<Term> TermList;
+
+private:
+ PhraseList _phraseList;
+ TermList _termList;
+
+public:
+ QueryWrapper(search::Query & query);
+ TermList & getTermList() { return _termList; }
+ const TermList & getTermList() const { return _termList; }
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
new file mode 100644
index 00000000000..b638b072d1d
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.rankmanager");
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/functiontablefactory.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include "rankmanager.h"
+
+using vespa::config::search::RankProfilesConfig;
+using vespa::config::search::vsm::VsmfieldsConfig;
+using search::fef::Blueprint;
+using search::fef::BlueprintFactory;
+using search::fef::FieldInfo;
+using search::fef::Properties;
+using search::fef::RankSetup;
+using vsm::VsmfieldsHandle;
+using vsm::VSMAdapter;
+
+namespace storage {
+
+void
+RankManager::Snapshot::addProperties(const vespa::config::search::RankProfilesConfig & cfg)
+{
+ for (uint32_t i = 0; i < cfg.rankprofile.size(); ++i) {
+ const RankProfilesConfig::Rankprofile & curr = cfg.rankprofile[i];
+ _properties.push_back(NamedPropertySet());
+ _properties.back().first = curr.name;
+ Properties & p = _properties.back().second;
+ for (uint32_t j = 0; j < curr.fef.property.size(); ++j) {
+ p.add(vespalib::string(curr.fef.property[j].name.c_str()),
+ vespalib::string(curr.fef.property[j].value.c_str()));
+ }
+ }
+}
+
+void
+RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields)
+{
+ for (uint32_t i = 0; i < fields->fieldspec.size(); ++i) {
+ const VsmfieldsConfig::Fieldspec & fs = fields->fieldspec[i];
+ bool isAttribute = (fs.fieldtype == VsmfieldsConfig::Fieldspec::ATTRIBUTE);
+ LOG(debug, "Adding field of type '%s' and name '%s' with id '%u' the index environment.",
+ isAttribute ? "ATTRIBUTE" : "INDEX", fs.name.c_str(), i);
+ // This id must match the vsm specific field id
+ _protoEnv.addField(fs.name, isAttribute);
+ }
+}
+
+void
+RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields)
+{
+ for (uint32_t i = 0; i < fields->documenttype.size(); ++i) {
+ const char * dname = fields->documenttype[i].name.c_str();
+ LOG(debug, "Looking through indexes for documenttype '%s'", dname);
+ for (uint32_t j = 0; j < fields->documenttype[i].index.size(); ++j) {
+ const char * iname = fields->documenttype[i].index[j].name.c_str();
+ LOG(debug, "Looking through fields for index '%s'", iname);
+ View view;
+ for (uint32_t k = 0; k < fields->documenttype[i].index[j].field.size(); ++k) {
+ const char * fname = fields->documenttype[i].index[j].field[k].name.c_str();
+ const FieldInfo * info = _protoEnv.getFieldByName(vespalib::string(fname));
+ if (info != NULL) {
+ LOG(debug, "Adding field '%s' to view in index '%s' (field id '%u')",
+ fname, iname, info->id());
+ view.push_back(info->id());
+ } else {
+ LOG(warning, "Field '%s' is not registred in the index environment. "
+ "Cannot add to index view.", fname);
+ }
+ }
+ if (_views.find(iname) == _views.end()) {
+ std::sort(view.begin(), view.end()); // lowest field id first
+ _views[iname] = view;
+ } else {
+ LOG(warning, "We already have a view for index '%s'. Drop the new view.", iname);
+ }
+ }
+ }
+}
+
+bool
+RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory)
+{
+ // set up individual index environments per rank profile
+ for (uint32_t i = 0; i < _properties.size(); ++i) {
+ _indexEnv.push_back(_protoEnv);
+ IndexEnvironment & ie = _indexEnv.back();
+ ie.getProperties().import(_properties[i].second);
+ }
+
+ // set up individual rank setups per rank profile
+ for (uint32_t i = 0; i < _indexEnv.size(); ++i) {
+ IndexEnvironment & ie = _indexEnv[i];
+
+ RankSetup::SP rs(new RankSetup(factory, ie));
+ rs->configure(); // reads config values from the property map
+ if (!rs->compile()) {
+ LOG(warning, "Could not compile rank setup for rank profile '%u'.", i);
+ return false;
+ }
+ _rankSetup.push_back(rs);
+ }
+ LOG_ASSERT(_indexEnv.size() == _rankSetup.size());
+ LOG(debug, "Number of index environments and rank setups: %u", (uint32_t)_indexEnv.size());
+ LOG_ASSERT(_properties.size() == _rankSetup.size());
+ for (uint32_t i = 0; i < _properties.size(); ++i) {
+ vespalib::string number = vespalib::make_vespa_string("%u", i);
+ _rpmap[number] = i;
+ }
+ for (uint32_t i = 0; i < _properties.size(); ++i) {
+ const vespalib::string &name = _properties[i].first;
+ _rpmap[name] = i;
+ }
+ return true;
+}
+
+RankManager::Snapshot::Snapshot() :
+ _tableManager(),
+ _protoEnv(_tableManager),
+ _properties(),
+ _indexEnv(),
+ _rankSetup(),
+ _rpmap(),
+ _views()
+{
+ _tableManager.addFactory(search::fef::ITableFactory::SP(new search::fef::FunctionTableFactory(256)));
+}
+
+bool
+RankManager::Snapshot::setup(const RankManager & rm, const std::vector<NamedPropertySet> & properties)
+{
+ _properties = properties;
+ return setup(rm);
+}
+
+bool
+RankManager::Snapshot::setup(const RankManager & rm)
+{
+ VsmfieldsHandle fields = rm._vsmAdapter->getFieldsConfig();
+ detectFields(fields);
+ buildFieldMappings(fields);
+ if (!initRankSetup(rm._blueprintFactory)) {
+ return false;
+ }
+ return true;
+}
+
+bool
+RankManager::Snapshot::setup(const RankManager & rm, const RankProfilesConfig & cfg)
+{
+ addProperties(cfg);
+ return setup(rm);
+}
+
+void RankManager::notify(const vsm::VSMConfigSnapshot & snap)
+{
+ configureRankProfiles(*snap.getConfig<RankProfilesConfig>());
+}
+
+
+void
+RankManager::configureRankProfiles(const RankProfilesConfig & cfg)
+{
+ LOG(debug, "configureRankProfiles(): Size of cfg rankprofiles: %zd", cfg.rankprofile.size());
+
+ std::unique_ptr<Snapshot> snapshot(new Snapshot());
+ if (snapshot->setup(*this, cfg)) {
+ _snapshot.set(snapshot.release());
+ _snapshot.latch(); // switch to the new config object
+ } else {
+ vespalib::string msg = "(re-)configuration of rank manager failed";
+ LOG(error, "%s", msg.c_str());
+ throw vespalib::Exception(msg, VESPA_STRLOC);
+ }
+}
+
+RankManager::RankManager(VSMAdapter * const vsmAdapter) :
+ _blueprintFactory(),
+ _snapshot(),
+ _vsmAdapter(vsmAdapter)
+{
+ // init blueprint factory
+ search::features::setup_search_features(_blueprintFactory);
+}
+
+RankManager::~RankManager()
+{
+}
+
+void
+RankManager::configure(const vsm::VSMConfigSnapshot & snap)
+{
+ notify(snap);
+}
+
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h
new file mode 100644
index 00000000000..d4fdafaba8a
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/config-rank-profiles.h>
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/ranksetup.h>
+#include <vespa/searchlib/fef/tablemanager.h>
+#include <vespa/vsm/vsm/vsm-adapter.h>
+#include "indexenvironment.h"
+
+namespace storage {
+
+/**
+ * This class subscribes to the rank-profiles config and keeps a setup per rank profile.
+ **/
+class RankManager
+{
+public:
+ /** collection of field ids for an index **/
+ typedef std::vector<uint32_t> View;
+
+ /**
+ * This class represents a snapshot of the rank-profiles config with associated setup per rank profile.
+ * A new instance of this class is created as part of reload config.
+ **/
+ class Snapshot {
+ private:
+ typedef std::pair<vespalib::string, search::fef::Properties> NamedPropertySet;
+ typedef vespalib::hash_map<vespalib::string, View> ViewMap;
+ typedef vespalib::hash_map<vespalib::string, int> Map;
+ search::fef::TableManager _tableManager;
+ IndexEnvironment _protoEnv;
+ std::vector<NamedPropertySet> _properties; // property set per rank profile
+ std::vector<IndexEnvironment> _indexEnv; // index environment per rank profile
+ std::vector<search::fef::RankSetup::SP> _rankSetup; // rank setup per rank profile
+ Map _rpmap;
+ ViewMap _views;
+
+ void addProperties(const vespa::config::search::RankProfilesConfig & cfg);
+ void detectFields(const vsm::VsmfieldsHandle & fields);
+ void buildFieldMappings(const vsm::VsmfieldsHandle & fields);
+ bool initRankSetup(const search::fef::BlueprintFactory & factory);
+ bool setup(const RankManager & manager);
+ int getIndex(const vespalib::string & key) const {
+ Map::const_iterator found(_rpmap.find(key));
+ return (found != _rpmap.end()) ? found->second : 0;
+ }
+
+ public:
+ typedef std::shared_ptr<Snapshot> SP;
+ Snapshot();
+ const std::vector<NamedPropertySet> & getProperties() const { return _properties; }
+ bool setup(const RankManager & manager, const vespa::config::search::RankProfilesConfig & cfg);
+ bool setup(const RankManager & manager, const std::vector<NamedPropertySet> & properties);
+ const search::fef::RankSetup & getRankSetup(const vespalib::string &rankProfile) const {
+ return *(_rankSetup[getIndex(rankProfile)]);
+ }
+ const IndexEnvironment & getIndexEnvironment(const vespalib::string &rankProfile) const {
+ return _indexEnv[getIndex(rankProfile)];
+ }
+ const View *getView(const vespalib::string & index) const {
+ ViewMap::const_iterator itr = _views.find(index);
+ if (itr != _views.end()) {
+ return &itr->second;
+ }
+ return NULL;
+ }
+ };
+
+private:
+ search::fef::BlueprintFactory _blueprintFactory;
+ vespalib::PtrHolder<Snapshot> _snapshot;
+ const vsm::VSMAdapter * _vsmAdapter;
+
+ void configureRankProfiles(const vespa::config::search::RankProfilesConfig & cfg);
+ virtual void notify(const vsm::VSMConfigSnapshot & snapshot);
+
+public:
+ RankManager(vsm::VSMAdapter * const vsmAdapter);
+ virtual ~RankManager();
+
+ void configure(const vsm::VSMConfigSnapshot & snap);
+
+ /**
+ * Retrieves the current snapshot of the rank-profiles config.
+ **/
+ Snapshot::SP getSnapshot() const { return _snapshot.get(); }
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
new file mode 100644
index 00000000000..090479f9b90
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -0,0 +1,304 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchvisitor.rankprocessor");
+#include <vespa/searchlib/fef/handle.h>
+#include <vespa/searchlib/fef/simpletermfielddata.h>
+#include <vespa/vsm/vsm/fieldsearchspec.h>
+#include "querytermdata.h"
+#include "rankprocessor.h"
+
+using search::FeatureSet;
+using search::HitList;
+using search::Query;
+using search::QueryTerm;
+using search::QueryTermList;
+using search::fef::FeatureHandle;
+using search::fef::MatchData;
+using search::fef::Properties;
+using search::fef::RankProgram;
+using search::fef::RankSetup;
+using search::fef::IllegalHandle;
+using search::fef::ITermData;
+using search::fef::ITermFieldData;
+using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+using vdslib::SearchResult;
+
+namespace storage {
+
+namespace {
+
+vespalib::string
+getIndexName(const vespalib::string & indexName, const vespalib::string & expandedIndexName)
+{
+ if (indexName == expandedIndexName) {
+ return indexName;
+ }
+ return indexName + "(" + expandedIndexName + ")";
+}
+
+FeatureHandle
+getFeatureHandle(const RankProgram &rankProgram) {
+ std::vector<vespalib::string> featureNames;
+ std::vector<FeatureHandle> featureHandles;
+ rankProgram.get_seed_handles(featureNames, featureHandles);
+ assert(featureNames.size() == 1);
+ assert(featureHandles.size() == 1);
+ return featureHandles.front();
+}
+
+}
+
+void
+RankProcessor::initQueryEnvironment()
+{
+ QueryWrapper::TermList & terms = _query.getTermList();
+
+ for (uint32_t i = 0; i < terms.size(); ++i) {
+ if (!terms[i].isPhraseTerm() || terms[i].isFirstPhraseTerm()) { // register 1 term data per phrase
+ QueryTermData & qtd = dynamic_cast<QueryTermData &>(terms[i].getTerm()->getQueryItem());
+
+ qtd.getTermData().setWeight(terms[i].getTerm()->weight());
+ qtd.getTermData().setUniqueId(terms[i].getTerm()->uniqueId());
+ if (terms[i].isFirstPhraseTerm()) {
+ qtd.getTermData().setPhraseLength(terms[i].getParent()->width());
+ } else {
+ qtd.getTermData().setPhraseLength(1);
+ }
+
+ vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(terms[i].getTerm()->index());
+ const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
+ if (view != NULL) {
+ RankManager::View::const_iterator iter = view->begin();
+ RankManager::View::const_iterator endp = view->end();
+ for (; iter != endp; ++iter) {
+ qtd.getTermData().addField(*iter).setHandle(_mdLayout.allocTermField(*iter));
+ }
+ } else {
+ LOG(warning, "Could not find a view for index '%s'. Ranking no fields.",
+ getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str());
+ }
+
+ LOG(debug, "Setup query term '%s:%s' (%s)",
+ getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str(),
+ terms[i].getTerm()->getTerm(),
+ terms[i].isFirstPhraseTerm() ? "phrase" : "term");
+ _queryEnv.addTerm(&qtd.getTermData());
+ } else {
+ LOG(debug, "Ignore query term '%s:%s' (part of phrase)",
+ terms[i].getTerm()->index().c_str(), terms[i].getTerm()->getTerm());
+ }
+ }
+}
+
+void
+RankProcessor::initHitCollector(size_t wantedHitCount)
+{
+ _hitCollector.reset(new HitCollector(wantedHitCount));
+}
+
+void
+RankProcessor::setupRankProgram(RankProgram &program)
+{
+ program.setup(_mdLayout, _queryEnv, search::fef::Properties());
+}
+
+void
+RankProcessor::init(bool forRanking, size_t wantedHitCount)
+{
+ initQueryEnvironment();
+ if (forRanking) {
+ if (_rankSetup.getSecondPhaseRank().empty()) {
+ _rankProgram = _rankSetup.create_first_phase_program();
+ } else {
+ // We calculate 2. phase ranking for all hits (no need calculating 1. phase ranking as well)
+ _rankProgram = _rankSetup.create_second_phase_program();
+ }
+ setupRankProgram(*_rankProgram);
+ _rankScoreHandle = getFeatureHandle(*_rankProgram);
+ _summaryProgram = _rankSetup.create_summary_program();
+ setupRankProgram(*_summaryProgram);
+ } else {
+ _rankProgram = _rankSetup.create_dump_program();
+ setupRankProgram(*_rankProgram);
+ }
+ initHitCollector(wantedHitCount);
+}
+
+RankProcessor::RankProcessor(RankManager::Snapshot::SP snapshot,
+ const vespalib::string &rankProfile,
+ search::Query & query,
+ const vespalib::string & location,
+ Properties & queryProperties,
+ const search::IAttributeManager * attrMgr) :
+
+ _rankManagerSnapshot(snapshot),
+ _rankSetup(snapshot->getRankSetup(rankProfile)),
+ _query(query),
+ _queryEnv(location, snapshot->getIndexEnvironment(rankProfile), queryProperties, attrMgr),
+ _mdLayout(),
+ _rankProgram(),
+ _score(0.0),
+ _summaryProgram(),
+ _rankScoreHandle(IllegalHandle),
+ _hitCollector()
+{
+}
+
+void
+RankProcessor::initForRanking(size_t wantedHitCount)
+{
+ return init(true, wantedHitCount);
+}
+
+void
+RankProcessor::initForDumping(size_t wantedHitCount)
+{
+ return init(false, wantedHitCount);
+}
+
+void
+RankProcessor::runRankProgram(uint32_t docId)
+{
+ _rankProgram->run(docId);
+ if (_rankScoreHandle != IllegalHandle) {
+ MatchData &matchData = _rankProgram->match_data();
+ _score = *(matchData.resolveFeature(_rankScoreHandle));
+ if (isnan(_score) || isinf(_score)) {
+ _score = -HUGE_VAL;
+ }
+ }
+}
+
+namespace {
+
+void
+copyTermFieldMatchData(const std::vector<search::fef::TermFieldMatchData> &src, MatchData &dst)
+{
+ assert(src.size() == dst.getNumTermFields());
+ for (search::fef::TermFieldHandle handle = 0; handle < dst.getNumTermFields(); ++handle) {
+ (*dst.resolveTermField(handle)) = src[handle];
+ }
+}
+
+class RankProgramWrapper : public HitCollector::IRankProgram
+{
+private:
+ RankProgram &_rankProgram;
+
+public:
+ RankProgramWrapper(RankProgram &rankProgram) : _rankProgram(rankProgram) {}
+ virtual const MatchData &run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) override {
+ // Prepare the match data object used by the rank program with earlier unpacked match data.
+ copyTermFieldMatchData(matchData, _rankProgram.match_data());
+ _rankProgram.run(docid);
+ return _rankProgram.match_data();
+ }
+};
+
+}
+
+FeatureSet::SP
+RankProcessor::calculateFeatureSet()
+{
+ LOG(debug, "Calculate feature set");
+ std::vector<vespalib::string> names;
+ std::vector<FeatureHandle> handles;
+ RankProgram &rankProgram = *(_summaryProgram.get() != nullptr ? _summaryProgram : _rankProgram);
+ rankProgram.get_seed_handles(names, handles);
+ LOG(debug, "Feature handles: numNames(%ld), numHandles(%ld)", names.size(), handles.size());
+ RankProgramWrapper wrapper(rankProgram);
+ FeatureSet::SP sf = _hitCollector->getFeatureSet(wrapper, names, handles);
+ LOG(debug, "Feature set: numFeatures(%u), numDocs(%u)", sf->numFeatures(), sf->numDocs());
+ return sf;
+}
+
+void
+RankProcessor::fillSearchResult(vdslib::SearchResult & searchResult)
+{
+ _hitCollector->fillSearchResult(searchResult);
+}
+
+void
+RankProcessor::unpackMatchData(uint32_t docId)
+{
+ MatchData &matchData = _rankProgram->match_data();
+ matchData.setDocId(docId);
+ unpackMatchData(matchData);
+}
+
+void
+RankProcessor::unpackMatchData(MatchData &matchData)
+{
+ QueryWrapper::TermList & terms = _query.getTermList();
+ for (uint32_t i = 0; i < terms.size(); ++i) {
+ if (!terms[i].isPhraseTerm() || terms[i].isFirstPhraseTerm()) { // consider 1 term data per phrase
+ bool isPhrase = terms[i].isFirstPhraseTerm();
+ QueryTermData & qtd = static_cast<QueryTermData &>(terms[i].getTerm()->getQueryItem());
+ const ITermData &td = qtd.getTermData();
+
+ HitList list;
+ const HitList & hitList = isPhrase ?
+ terms[i].getParent()->evaluateHits(list) : terms[i].getTerm()->evaluateHits(list);
+
+ if (hitList.size() > 0) { // only unpack if we have a hit
+ LOG(debug, "Unpack match data for query term '%s:%s' (%s)",
+ terms[i].getTerm()->index().c_str(), terms[i].getTerm()->getTerm(),
+ isPhrase ? "phrase" : "term");
+
+ uint32_t lastFieldId = -1;
+ TermFieldMatchData *tmd = 0;
+ uint32_t fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
+
+ // optimize for hitlist giving all hits for a single field in one chunk
+ for (const search::Hit & hit : hitList) {
+ uint32_t fieldId = hit.context();
+
+ if (fieldId != lastFieldId) {
+ // reset to notfound/unknown values
+ tmd = 0;
+ fieldLen = search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
+
+ // setup for new field that had a hit
+ const ITermFieldData *tfd = td.lookupField(fieldId);
+ if (tfd != 0) {
+ tmd = matchData.resolveTermField(tfd->getHandle());
+ tmd->setFieldId(fieldId);
+ // reset field match data, but only once per docId
+ if (tmd->getDocId() != matchData.getDocId()) {
+ tmd->reset(matchData.getDocId());
+ }
+ }
+ // find fieldLen for new field
+ if (isPhrase) {
+ if (fieldId < terms[i].getParent()->getFieldInfoSize()) {
+ const QueryTerm::FieldInfo & fi = terms[i].getParent()->getFieldInfo(fieldId);
+ fieldLen = fi.getFieldLength();
+ }
+ } else {
+ if (fieldId < terms[i].getTerm()->getFieldInfoSize()) {
+ const QueryTerm::FieldInfo & fi = terms[i].getTerm()->getFieldInfo(fieldId);
+ fieldLen = fi.getFieldLength();
+ }
+ }
+ lastFieldId = fieldId;
+ }
+ if (tmd != 0) {
+ // adjust so that the position for phrase terms equals the match for the first term
+ TermFieldMatchDataPosition pos(0, hit.wordpos() - terms[i].getPosAdjust(),
+ hit.weight(), fieldLen);
+ tmd->appendPosition(pos);
+ LOG(debug, "Append position(%u), weight(%d), tfmd.weight(%d)",
+ pos.getPosition(), pos.getElementWeight(), tmd->getWeight());
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
new file mode 100644
index 00000000000..0596d0803f3
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/rank_program.h>
+#include <vespa/searchlib/fef/ranksetup.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/vdslib/container/searchresult.h>
+#include "hitcollector.h"
+#include "queryenvironment.h"
+#include "querywrapper.h"
+#include "rankmanager.h"
+
+namespace storage {
+
+/**
+ * This class is associated with a query and a rank profile and
+ * is used to calculate rank and feature set for matched documents.
+ **/
+class RankProcessor
+{
+private:
+ RankManager::Snapshot::SP _rankManagerSnapshot;
+ const search::fef::RankSetup & _rankSetup;
+ QueryWrapper _query;
+
+ QueryEnvironment _queryEnv;
+ search::fef::MatchDataLayout _mdLayout;
+ search::fef::RankProgram::UP _rankProgram;
+ double _score;
+ search::fef::RankProgram::UP _summaryProgram;
+ search::fef::FeatureHandle _rankScoreHandle;
+ HitCollector::UP _hitCollector;
+
+ void initQueryEnvironment();
+ void initHitCollector(size_t wantedHitCount);
+ void setupRankProgram(search::fef::RankProgram &program);
+
+ /**
+ * Initializes this rank processor.
+ * @param forRanking whether this should be used for ranking or dumping.
+ * @param wantedHitCount the number of hits we want to return from the hit collector.
+ * @return whether the rank processor was initialized or not.
+ **/
+ void init(bool forRanking, size_t wantedHitCount);
+
+ void unpackMatchData(search::fef::MatchData &matchData);
+
+public:
+ typedef std::unique_ptr<RankProcessor> UP;
+
+ RankProcessor(RankManager::Snapshot::SP snapshot,
+ const vespalib::string &rankProfile,
+ search::Query & query,
+ const vespalib::string & location,
+ search::fef::Properties & queryProperties,
+ const search::IAttributeManager * attrMgr);
+
+ void initForRanking(size_t wantedHitCount);
+ void initForDumping(size_t wantedHitCount);
+ void unpackMatchData(uint32_t docId);
+ void runRankProgram(uint32_t docId);
+ search::FeatureSet::SP calculateFeatureSet();
+ void fillSearchResult(vdslib::SearchResult & searchResult);
+ const search::fef::MatchData &getMatchData() const { return _rankProgram->match_data(); }
+ void setRankScore(double score) { _score = score; }
+ double getRankScore() const { return _score; }
+ HitCollector & getHitCollector() { return *_hitCollector; }
+};
+
+} // namespace storage
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp
new file mode 100644
index 00000000000..df40062ce07
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "searchenvironment.h"
+
+LOG_SETUP(".visitor.instance.searchenvironment");
+
+using search::docsummary::JuniperProperties;
+using vsm::VSMAdapter;
+
+namespace storage {
+
+__thread SearchEnvironment::EnvMap * SearchEnvironment::_localEnvMap=0;
+
+SearchEnvironment::Env::Env(const vespalib::string & muffens, const config::ConfigUri & configUri, Fast_NormalizeWordFolder & wf) :
+ _configId(configUri.getConfigId()),
+ _configurer(config::SimpleConfigRetriever::UP(
+ new config::SimpleConfigRetriever(createKeySet(configUri.getConfigId()), configUri.getContext())),
+ this),
+ _vsmAdapter(new VSMAdapter(muffens, _configId, wf)),
+ _rankManager(new RankManager(_vsmAdapter.get()))
+{
+
+ _configurer.start();
+}
+
+config::ConfigKeySet
+SearchEnvironment::Env::createKeySet(const vespalib::string & configId)
+{
+ config::ConfigKeySet set;
+ set.add<vespa::config::search::vsm::VsmfieldsConfig,
+ vespa::config::search::SummaryConfig,
+ vespa::config::search::SummarymapConfig,
+ vespa::config::search::vsm::VsmsummaryConfig,
+ vespa::config::search::summary::JuniperrcConfig,
+ vespa::config::search::RankProfilesConfig>(configId);
+ return set;
+}
+
+void
+SearchEnvironment::Env::configure(const config::ConfigSnapshot & snapshot)
+{
+ vsm::VSMConfigSnapshot snap(_configId, snapshot);
+ _vsmAdapter->configure(snap);
+ _rankManager->configure(snap);
+}
+
+SearchEnvironment::Env::~Env()
+{
+ _configurer.close();
+}
+
+SearchEnvironment::SearchEnvironment(const config::ConfigUri & configUri) :
+ VisitorEnvironment(),
+ _envMap(),
+ _configUri(configUri)
+{
+}
+
+SearchEnvironment::~SearchEnvironment()
+{
+ vespalib::LockGuard guard(_lock);
+ _threadLocals.clear();
+}
+
+SearchEnvironment::Env & SearchEnvironment::getEnv(const vespalib::string & searchCluster)
+{
+ config::ConfigUri searchClusterUri(_configUri.createWithNewId(searchCluster));
+ if (_localEnvMap == NULL) {
+ _localEnvMap = new EnvMap;
+ vespalib::LockGuard guard(_lock);
+ _threadLocals.push_back(EnvMapLP(_localEnvMap));
+ }
+ EnvMap::iterator localFound = _localEnvMap->find(searchCluster);
+ if (localFound == _localEnvMap->end()) {
+ vespalib::LockGuard guard(_lock);
+ EnvMap::iterator found = _envMap.find(searchCluster);
+ if (found == _envMap.end()) {
+ LOG(debug, "Init VSMAdapter with config id = '%s'", searchCluster.c_str());
+ _envMap[searchCluster].reset(new Env("*", searchClusterUri, _wordFolder));
+ found = _envMap.find(searchCluster);
+ }
+ _localEnvMap->insert(*found);
+ localFound = _localEnvMap->find(searchCluster);
+ }
+ return *localFound->second;
+}
+
+}
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h
new file mode 100644
index 00000000000..c67153b8dd2
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h
@@ -0,0 +1,53 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchsummary/docsummary/juniperproperties.h>
+#include <vespa/storage/visiting/visitor.h>
+#include <vespa/config/retriever/simpleconfigurer.h>
+#include <vespa/vsm/vsm/vsm-adapter.h>
+#include "rankmanager.h"
+
+namespace storage {
+
+class SearchEnvironment : public VisitorEnvironment
+{
+private:
+ class Env : public config::SimpleConfigurable {
+ public:
+ typedef std::shared_ptr<Env> SP;
+ Env(const vespalib::string & muffens, const config::ConfigUri & configUri, Fast_NormalizeWordFolder & wf);
+ ~Env();
+ const vsm::VSMAdapter * getVSMAdapter() const { return _vsmAdapter.get(); }
+ const RankManager * getRankManager() const { return _rankManager.get(); }
+ void configure(const config::ConfigSnapshot & snapshot);
+
+ static config::ConfigKeySet createKeySet(const vespalib::string & configId);
+ private:
+ const vespalib::string _configId;
+ config::SimpleConfigurer _configurer;
+ std::unique_ptr<vsm::VSMAdapter> _vsmAdapter;
+ std::unique_ptr<RankManager> _rankManager;
+ };
+ typedef vespalib::hash_map<vespalib::string, Env::SP> EnvMap;
+ typedef vespalib::LinkedPtr<EnvMap> EnvMapLP;
+ typedef std::vector< vespalib::LinkedPtr<EnvMap> > ThreadLocals;
+
+ static __thread EnvMap * _localEnvMap;
+ EnvMap _envMap;
+ ThreadLocals _threadLocals;
+ vespalib::Lock _lock;
+ Fast_NormalizeWordFolder _wordFolder;
+ config::ConfigUri _configUri;
+
+ Env & getEnv(const vespalib::string & searchcluster);
+
+public:
+ SearchEnvironment(const config::ConfigUri & configUri);
+ ~SearchEnvironment();
+ const vsm::VSMAdapter * getVSMAdapter(const vespalib::string & searchcluster) { return getEnv(searchcluster).getVSMAdapter(); }
+ const RankManager * getRankManager(const vespalib::string & searchcluster) { return getEnv(searchcluster).getRankManager(); }
+};
+
+}
+
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
new file mode 100644
index 00000000000..c15062feb8a
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -0,0 +1,1166 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/document/datatype/positiondatatype.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/bytefieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/doublefieldvalue.h>
+#include <vespa/document/fieldvalue/floatfieldvalue.h>
+#include <vespa/document/fieldvalue/longfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/aggregation/modifiers.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/fastlib/text/wordfolder.h>
+#include <vespa/vdslib/container/documentlist.h>
+#include <vespa/vsm/config/vsm-cfif.h>
+#include <vespa/vsm/vsm/docsumfilter.h>
+#include <vespa/vsm/vsm/vsm-adapter.h>
+#include "querytermdata.h"
+#include "searchenvironment.h"
+#include "searchvisitor.h"
+
+LOG_SETUP(".visitor.instance.searchvisitor");
+
+namespace storage {
+
+using vsm::VSMAdapter;
+using vsm::DocsumFilter;
+using vsm::DocsumTools;
+using vsm::DocsumToolsPtr;
+using vsm::DocSumCache;
+using vsm::FieldIdTSearcherMap;
+using vsm::FieldPathMapT;
+using vsm::FieldSearcher;
+using vsm::FieldSearchSpecMap;
+using vsm::VsmfieldsHandle;
+using vsm::FieldPath;
+using vsm::StorageDocument;
+using vsm::StringFieldIdTMap;
+using search::IAttributeManager;
+using search::AttributeGuard;
+using search::AttributeManager;
+using search::AttributeVector;
+using search::attribute::IAttributeVector;
+using search::EmptyQueryNodeResult;
+using search::Query;
+using search::QueryPacketT;
+using search::FeatureSet;
+using search::fs4transport::FS4Packet_DOCSUM;
+using search::fs4transport::FS4Packet_EOL;
+using search::fs4transport::PacketArray;
+using namespace search::docsummary;
+using namespace search::aggregation;
+using namespace search::expression;
+using vdslib::Parameters;
+using vdslib::DocumentList;
+
+
+class ForceWordfolderInit
+{
+public:
+ ForceWordfolderInit();
+};
+
+ForceWordfolderInit::ForceWordfolderInit()
+{
+ Fast_NormalizeWordFolder::Setup(Fast_NormalizeWordFolder::DO_ACCENT_REMOVAL |
+ Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION |
+ Fast_NormalizeWordFolder::DO_LIGATURE_SUBSTITUTION |
+ Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION);
+}
+
+static ForceWordfolderInit _G_forceNormWordFolderInit;
+
+
+AttributeVector::SP
+createMultiValueAttribute(const vespalib::string & name, const document::FieldValue & fv, bool arrayType)
+{
+ const document::DataType * ndt = fv.getDataType();
+ if (ndt->inherits(document::CollectionDataType::classId)) {
+ ndt = &(static_cast<const document::CollectionDataType *>(ndt))
+ ->getNestedType();
+ }
+ LOG(debug, "Create %s attribute '%s' with data type '%s' (%s)",
+ arrayType ? "array" : "weighted set", name.c_str(), ndt->getName().c_str(), fv.getClass().name());
+ AttributeVector::SP attr;
+ if (ndt->getId() == document::DataType::T_BYTE ||
+ ndt->getId() == document::DataType::T_INT ||
+ ndt->getId() == document::DataType::T_LONG)
+ {
+ attr.reset(arrayType ? static_cast<AttributeVector *>(new search::MultiIntegerExtAttribute(name))
+ : static_cast<AttributeVector *>(new search::WeightedSetIntegerExtAttribute(name)));
+ } else if (ndt->getId() == document::DataType::T_DOUBLE ||
+ ndt->getId() == document::DataType::T_FLOAT)
+ {
+ attr.reset(arrayType ? static_cast<AttributeVector *>(new search::MultiFloatExtAttribute(name))
+ : static_cast<AttributeVector *>(new search::WeightedSetFloatExtAttribute(name)));
+ } else if (ndt->getId() == document::DataType::T_STRING) {
+ attr.reset(arrayType ? static_cast<AttributeVector *>(new search::MultiStringExtAttribute(name))
+ : static_cast<AttributeVector *>(new search::WeightedSetStringExtAttribute(name)));
+ } else {
+ LOG(debug, "Can not make an multivalue attribute out of %s with data type '%s' (%s)",
+ name.c_str(), ndt->getName().c_str(), fv.getClass().name());
+ }
+ return attr;
+}
+
+AttributeVector::SP
+createAttribute(const vespalib::string & name, const document::FieldValue & fv)
+{
+ LOG(debug, "Create single value attribute '%s' with value type '%s'", name.c_str(), fv.getClass().name());
+ AttributeVector::SP attr;
+
+ if (fv.inherits(document::ByteFieldValue::classId) || fv.inherits(document::IntFieldValue::classId) || fv.inherits(document::LongFieldValue::classId)) {
+ attr.reset(new search::SingleIntegerExtAttribute(name));
+ } else if (fv.inherits(document::DoubleFieldValue::classId) || fv.inherits(document::FloatFieldValue::classId)) {
+ attr.reset(new search::SingleFloatExtAttribute(name));
+ } else if (fv.inherits(document::StringFieldValue::classId)) {
+ attr.reset(new search::SingleStringExtAttribute(name));
+ } else {
+ LOG(debug, "Can not make an attribute out of %s of type '%s'.", name.c_str(), fv.getClass().name());
+ }
+ return attr;
+}
+
+SearchVisitor::SummaryGenerator::SummaryGenerator() :
+ search::aggregation::HitsAggregationResult::SummaryGenerator(),
+ _callback(),
+ _docsumState(_callback),
+ _docsumFilter(),
+ _docsumWriter(NULL),
+ _rawBuf(4096)
+{
+}
+
+vespalib::ConstBufferRef SearchVisitor::SummaryGenerator::fillSummary(search::AttributeVector::DocId lid, const search::aggregation::HitsAggregationResult::SummaryClassType & summaryClass)
+{
+ if (_docsumWriter != NULL) {
+ _rawBuf.reset();
+ _docsumState._args.setResultClassName(summaryClass);
+ uint32_t docsumLen = _docsumWriter->WriteDocsum(lid, &_docsumState, _docsumFilter.get(), &_rawBuf);
+ return vespalib::ConstBufferRef(_rawBuf.GetDrainPos(), docsumLen);
+ }
+ return vespalib::ConstBufferRef();
+}
+
+void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
+{
+ search::aggregation::HitsAggregationResult & hitsAggr(static_cast<search::aggregation::HitsAggregationResult &>(obj));
+ hitsAggr.setSummaryGenerator(_summaryGenerator);
+ _numHitsAggregators++;
+}
+
+bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
+{
+ return obj.getClass().inherits(search::aggregation::HitsAggregationResult::classId);
+}
+
+SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) :
+ _grouping(grouping),
+ _count(0),
+ _limit(grouping->getMaxN(std::numeric_limits<size_t>::max()))
+{
+}
+
+void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
+{
+ if (_count < _limit) {
+ _grouping->aggregate(doc, rank);
+ _count++;
+ }
+}
+
+SearchVisitor::~SearchVisitor() {
+ if (! isCompletedCalled()) {
+ document::OrderingSpecification orderSpec;
+ HitCounter hc(&orderSpec);
+ completedVisitingInternal(hc);
+ }
+}
+
+SearchVisitor::SearchVisitor(StorageComponent& component,
+ VisitorEnvironment& vEnv,
+ const Parameters& params) :
+ Visitor(component),
+ _env(static_cast<SearchEnvironment &>(vEnv)),
+ _params(params),
+ _vsmAdapter(NULL),
+ _docSearchedCount(0),
+ _hitCount(0),
+ _hitsRejectedCount(0),
+ _query(),
+ _queryResult(new documentapi::QueryResultMessage()),
+ _fieldSearcherMap(),
+ _docTypeMapping(),
+ _fieldSearchSpecMap(),
+ _snippetModifierManager(),
+ _summaryGenerator(),
+ _summaryClass("default"),
+ _attrMan(),
+ _attrCtx(_attrMan.createContext()),
+ _groupingList(),
+ _attributeFields(),
+ _sortList(),
+ _docsumWriter(NULL),
+ _searchBuffer(new vsm::SearcherBuf()),
+ _tmpSortBuffer(256),
+ _documentIdAttributeBacking(new search::SingleStringExtAttribute("[docid]") ),
+ _rankAttributeBacking(new search::SingleFloatExtAttribute("[rank]") ),
+ _documentIdAttribute(dynamic_cast<search::SingleStringExtAttribute &>(*_documentIdAttributeBacking)),
+ _rankAttribute(dynamic_cast<search::SingleFloatExtAttribute &>(*_rankAttributeBacking)),
+ _shouldFillRankAttribute(false),
+ _syntheticFieldsController(),
+ _rankController()
+{
+ LOG(debug, "Created SearchVisitor");
+}
+
+void SearchVisitor::init(const Parameters & params)
+{
+ _attrMan.add(_documentIdAttributeBacking);
+ _attrMan.add(_rankAttributeBacking);
+ Parameters::ValueRef valueRef;
+ if ( params.get("summaryclass", valueRef) ) {
+ _summaryClass = vespalib::string(static_cast<const char *>(valueRef.data()),
+ static_cast<unsigned>(valueRef.size()));
+ LOG(debug, "Received summary class: %s", _summaryClass.c_str());
+ }
+
+ size_t wantedSummaryCount(10);
+ if (params.get("summarycount", valueRef) ) {
+ vespalib::string tmp(static_cast<const char *>(valueRef.data()), valueRef.size());
+ wantedSummaryCount = strtoul(tmp.c_str(), NULL, 0);
+ LOG(debug, "Received summary count: %ld", wantedSummaryCount);
+ }
+ _queryResult->getSearchResult().setWantedHitCount(wantedSummaryCount);
+
+ if (params.get("rankprofile", valueRef) ) {
+ vespalib::string tmp(static_cast<const char *>(valueRef.data()), valueRef.size());
+ _rankController.setRankProfile(tmp);
+ LOG(debug, "Received rank profile: %s", _rankController.getRankProfile().c_str());
+ }
+
+ if (params.get("queryflags", valueRef) ) {
+ vespalib::string tmp(static_cast<const char *>(valueRef.data()), valueRef.size());
+ LOG(debug, "Received query flags: 0x%lx", strtoul(tmp.c_str(), NULL, 0));
+ uint32_t queryFlags = strtoul(tmp.c_str(), NULL, 0);
+ _rankController.setDumpFeatures((queryFlags & search::fs4transport::QFLAG_DUMP_FEATURES) != 0);
+ LOG(debug, "QFLAG_DUMP_FEATURES: %s", _rankController.getDumpFeatures() ? "true" : "false");
+ }
+
+ if (params.get("rankproperties", valueRef) && valueRef.size() > 0) {
+ LOG(spam, "Received rank properties of %zd bytes", valueRef.size());
+ uint32_t len = static_cast<uint32_t>(valueRef.size());
+ char * data = const_cast<char *>(static_cast<const char *>(valueRef.data()));
+ FNET_DataBuffer src(data, len);
+ uint32_t cnt = src.ReadInt32();
+ len -= sizeof(uint32_t);
+ LOG(debug, "Properties count: '%u'", cnt);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ search::fs4transport::FS4Properties prop;
+ if (!prop.decode(src, len)) {
+ LOG(warning, "Could not decode rank properties");
+ } else {
+ LOG(debug, "Properties[%u]: name '%s', size '%u'", i, prop.getName(), prop.size());
+ if (strcmp(prop.getName(), "rank") == 0) { // pick up rank properties
+ for (uint32_t j = 0; j < prop.size(); ++j) {
+ LOG(debug, "Properties[%u][%u]: key '%s' -> value '%s'", i, j, prop.getKey(j), prop.getValue(j));
+ _rankController.getQueryProperties().add(vespalib::string(prop.getKey(j), prop.getKeyLen(j)),
+ vespalib::string(prop.getValue(j), prop.getValueLen(j)));
+ }
+ }
+ }
+ }
+ } else {
+ LOG(debug, "No rank properties received");
+ }
+
+ if (params.get("rankprofile", valueRef)) {
+ vespalib::string tmp(static_cast<const char *>(valueRef.data()), valueRef.size());
+ _summaryGenerator.getDocsumState()._args.SetRankProfile(tmp);
+ }
+
+ int queryFlags = 0;
+ if (params.get("queryflags", queryFlags)) {
+ _summaryGenerator.getDocsumState()._args.SetQueryFlags(queryFlags);
+ }
+
+ vespalib::string location;
+ if (params.get("location", valueRef)) {
+ location = vespalib::string(static_cast<const char *>(valueRef.data()), valueRef.size());
+ LOG(debug, "Location = '%s'", location.c_str());
+ _summaryGenerator.getDocsumState()._args.SetLocation(valueRef.size(), (const char*)valueRef.data());
+ }
+
+ Parameters::ValueRef searchClusterBlob;
+ if (params.get("searchcluster", searchClusterBlob)) {
+ LOG(spam, "Received searchcluster blob of %zd bytes", searchClusterBlob.size());
+ vespalib::string searchCluster(static_cast<const char *>(searchClusterBlob.data()), searchClusterBlob.size());
+ _vsmAdapter = _env.getVSMAdapter(searchCluster);
+
+ if ( params.get("sort", valueRef) ) {
+ _sortSpec = search::common::SortSpec(vespalib::string(static_cast<const char *>(valueRef.data()),
+ static_cast<unsigned>(valueRef.size())));
+ LOG(debug, "Received sort specification: '%s'", _sortSpec.getSpec().c_str());
+ }
+
+ Parameters::ValueRef queryBlob;
+ if ( params.get("query", queryBlob) ) {
+ LOG(spam, "Received query blob of %zd bytes", queryBlob.size());
+ QueryTermData resultAddOn;
+ _query = Query(resultAddOn, QueryPacketT(static_cast<const char *>(queryBlob.data()), queryBlob.size()));
+ LOG(debug, "Query tree: '%s'", _query.asString().c_str());
+ _searchBuffer->reserve(0x10000);
+
+ int stackCount = 0;
+ if (params.get("querystackcount", stackCount)) {
+ _summaryGenerator.getDocsumState()._args.SetStackDump(stackCount, queryBlob.size(), (const char*)queryBlob.data());
+ } else {
+ LOG(warning, "Request without query stack count");
+ }
+
+ std::vector<vespalib::string> additionalFields;
+ registerAdditionalFields(_vsmAdapter->getDocsumTools()->getFieldSpecs(), additionalFields);
+
+ StringFieldIdTMap fieldsInQuery;
+ setupFieldSearchers(additionalFields, fieldsInQuery);
+
+ setupSnippetModifiers();
+
+ setupScratchDocument(fieldsInQuery);
+
+ _syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery);
+
+ setupAttributeVectors();
+
+ setupAttributeVectorsForSorting(_sortSpec);
+
+ const RankManager * rm = _env.getRankManager(searchCluster);
+ _rankController.setRankManagerSnapshot(rm->getSnapshot());
+ _rankController.setupRankProcessors(_query, location, wantedSummaryCount, _attrMan, _attributeFields);
+ // Depends on hitCollector setup.
+ setupDocsumObjects();
+
+ } else {
+ LOG(warning, "No query received");
+ }
+
+ if (params.get("aggregation", valueRef) ) {
+ std::vector<char> newAggrBlob;
+ newAggrBlob.resize(valueRef.size());
+ memcpy(&newAggrBlob[0], valueRef.data(), newAggrBlob.size());
+ LOG(debug, "Received new aggregation blob of %zd bytes", newAggrBlob.size());
+ setupGrouping(newAggrBlob);
+ }
+
+ } else {
+ LOG(warning, "No searchcluster specified");
+ }
+
+ if ( params.get("unique", valueRef) ) {
+ LOG(spam, "Received unique specification of %zd bytes", valueRef.size());
+ } else {
+ LOG(debug, "No unique specification received");
+ }
+}
+
+SearchVisitorFactory::SearchVisitorFactory(const config::ConfigUri & configUri)
+ : VisitorFactory(),
+ _configUri(configUri)
+{}
+
+VisitorEnvironment::UP
+SearchVisitorFactory::makeVisitorEnvironment(StorageComponent&)
+{
+ return VisitorEnvironment::UP(new SearchEnvironment(_configUri));
+}
+
+storage::Visitor*
+SearchVisitorFactory::makeVisitor(StorageComponent& component,
+ storage::VisitorEnvironment& env,
+ const vdslib::Parameters& params)
+{
+ return new SearchVisitor(component, env, params);
+}
+
+void
+SearchVisitor::AttributeInserter::onPrimitive(const IteratorContent & c)
+{
+ const document::FieldValue & value = c.getValue();
+ LOG(debug, "AttributeInserter: Adding value '%s'(%d) to attribute '%s' for docid '%d'",
+ value.toString().c_str(), c.getWeight(), _attribute.getName().c_str(), _docId);
+ search::IExtendAttribute & attr = *_attribute.getExtendInterface();
+ const vespalib::Identifiable::RuntimeClass & aInfo = _attribute.getClass();
+ if (aInfo.inherits(search::IntegerAttribute::classId)) {
+ attr.add(value.getAsLong(), c.getWeight());
+ } else if (aInfo.inherits(search::FloatingPointAttribute::classId)) {
+ attr.add(value.getAsDouble(), c.getWeight());
+ } else if (aInfo.inherits(search::StringAttribute::classId)) {
+ attr.add(value.getAsString().c_str(), c.getWeight());
+ } else {
+ assert(false && "We got an attribute vector that is of an unknown type");
+ }
+}
+
+SearchVisitor::AttributeInserter::AttributeInserter(search::AttributeVector & attribute, search::AttributeVector::DocId docId) :
+ _attribute(attribute),
+ _docId(docId)
+{
+}
+
+SearchVisitor::PositionInserter::PositionInserter(search::AttributeVector & attribute, search::AttributeVector::DocId docId) :
+ AttributeInserter(attribute, docId),
+ _fieldX(document::PositionDataType::getInstance().getField(document::PositionDataType::FIELD_X)),
+ _fieldY(document::PositionDataType::getInstance().getField(document::PositionDataType::FIELD_Y))
+{
+}
+
+void
+SearchVisitor::PositionInserter::onPrimitive(const IteratorContent & c)
+{
+ (void) c;
+}
+
+void
+SearchVisitor::PositionInserter::onStructStart(const IteratorContent & c)
+{
+ const document::StructuredFieldValue & value = static_cast<const document::StructuredFieldValue &>(c.getValue());
+ LOG(debug, "PositionInserter: Adding value '%s'(%d) to attribute '%s' for docid '%d'",
+ value.toString().c_str(), c.getWeight(), _attribute.getName().c_str(), _docId);
+
+ value.getValue(_fieldX, _valueX);
+ value.getValue(_fieldY, _valueY);
+ int64_t zcurve = vespalib::geo::ZCurve::encode(_valueX.getValue(), _valueY.getValue());
+ LOG(debug, "X=%d, Y=%d, zcurve=%ld", _valueX.getValue(), _valueY.getValue(), zcurve);
+ search::IExtendAttribute & attr = *_attribute.getExtendInterface();
+ attr.add(zcurve, c.getWeight());
+}
+
+void
+SearchVisitor::RankController::processHintedAttributes(const IndexEnvironment & indexEnv, bool rank,
+ const search::IAttributeManager & attrMan,
+ std::vector<AttrInfo> & attributeFields)
+{
+ const std::set<vespalib::string> & attributes = (rank ? indexEnv.getHintedRankAttributes() : indexEnv.getHintedDumpAttributes());
+ for (const vespalib::string & name : attributes) {
+ LOG(debug, "Process attribute access hint (%s): '%s'", rank ? "rank" : "dump", name.c_str());
+ const search::fef::FieldInfo * fieldInfo = indexEnv.getFieldByName(name);
+ if (fieldInfo != NULL) {
+ bool found = false;
+ uint32_t fid = fieldInfo->id();
+ for (size_t j = 0; !found && (j < attributeFields.size()); ++j) {
+ found = (attributeFields[j]._field == fid);
+ }
+ if (!found) {
+ search::AttributeGuard::UP attr(attrMan.getAttribute(name));
+ if (attr->valid()) {
+ LOG(debug, "Add attribute '%s' with field id '%u' to the list of needed attributes", name.c_str(), fid);
+ attributeFields.push_back(AttrInfo(fid, std::move(attr)));
+ } else {
+ LOG(warning, "Cannot locate attribute '%s' in the attribute manager. "
+ "Ignore access hint about this attribute", name.c_str());
+ }
+ }
+ } else {
+ LOG(warning, "Cannot locate field '%s' in the index environment. Ignore access hint about this attribute",
+ name.c_str());
+ }
+ }
+}
+
+SearchVisitor::RankController::RankController() :
+ _rankProfile("default"),
+ _rankManagerSnapshot(NULL),
+ _rankSetup(NULL),
+ _queryProperties(),
+ _hasRanking(false),
+ _rankProcessor(),
+ _dumpFeatures(false),
+ _dumpProcessor()
+{
+}
+
+void
+SearchVisitor::RankController::setupRankProcessors(search::Query & query,
+ const vespalib::string & location,
+ size_t wantedHitCount,
+ const search::IAttributeManager & attrMan,
+ std::vector<AttrInfo> & attributeFields)
+{
+ _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile);
+
+ // register attribute vectors needed for ranking
+ const IndexEnvironment & indexEnv = _rankManagerSnapshot->getIndexEnvironment(_rankProfile);
+ processHintedAttributes(indexEnv, true, attrMan, attributeFields);
+
+ _rankProcessor.reset(new RankProcessor(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan));
+ LOG(debug, "Initialize rank processor");
+ _rankProcessor->initForRanking(wantedHitCount);
+
+ if (_dumpFeatures) {
+ // register attribute vectors needed for dumping
+ processHintedAttributes(indexEnv, false, attrMan, attributeFields);
+
+ _dumpProcessor.reset(new RankProcessor(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan));
+ LOG(debug, "Initialize dump processor");
+ _dumpProcessor->initForDumping(wantedHitCount);
+ }
+
+ _hasRanking = true;
+}
+
+
+void
+SearchVisitor::RankController::onDocumentMatch(uint32_t docId)
+{
+ // unpacking into match data
+ _rankProcessor->unpackMatchData(docId);
+ if (_dumpFeatures) {
+ _dumpProcessor->unpackMatchData(docId);
+ }
+}
+
+void
+SearchVisitor::RankController::rankMatchedDocument(uint32_t docId)
+{
+ _rankProcessor->runRankProgram(docId);
+ LOG(debug, "Rank score for matched document %u: %f",
+ _rankProcessor->getMatchData().getDocId(),
+ _rankProcessor->getRankScore());
+ if (_dumpFeatures) {
+ _dumpProcessor->runRankProgram(docId);
+ // we must transfer the score to this match data to make sure that the same hits
+ // are kept on the hit collector used in the dump processor as the one used in the rank processor
+ _dumpProcessor->setRankScore(_rankProcessor->getRankScore());
+ }
+}
+
+bool
+SearchVisitor::RankController::keepMatchedDocument()
+{
+ // also make sure that NaN scores are added
+ return (!(_rankProcessor->getRankScore() <= _rankSetup->getRankScoreDropLimit()));
+}
+
+bool
+SearchVisitor::RankController::collectMatchedDocument(bool hasSorting,
+ SearchVisitor & visitor,
+ const std::vector<char> & tmpSortBuffer,
+ const vsm::StorageDocument::SP & document)
+{
+ bool amongTheBest(false);
+ if (!hasSorting) {
+ amongTheBest = _rankProcessor->getHitCollector().addHit(document, _rankProcessor->getMatchData(), _rankProcessor->getRankScore());
+ if (amongTheBest && _dumpFeatures) {
+ _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore());
+ }
+ } else {
+ size_t pos = visitor.fillSortBuffer();
+ LOG(spam, "SortBlob is %ld bytes", pos);
+ amongTheBest = _rankProcessor->getHitCollector().addHit(document, _rankProcessor->getMatchData(), _rankProcessor->getRankScore(),
+ &tmpSortBuffer[0], pos);
+ if (amongTheBest && _dumpFeatures) {
+ _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore(),
+ &tmpSortBuffer[0], pos);
+ }
+ }
+ return amongTheBest;
+}
+
+void
+SearchVisitor::RankController::onCompletedVisiting(vsm::GetDocsumsStateCallback & docsumsStateCallback, vdslib::SearchResult & searchResult)
+{
+ if (_hasRanking) {
+ // fill the search result with the hits from the hit collector
+ _rankProcessor->fillSearchResult(searchResult);
+
+ // calculate summary features and set them on the callback object
+ if (!_rankSetup->getSummaryFeatures().empty()) {
+ LOG(debug, "Calculate summary features");
+ search::FeatureSet::SP sf = _rankProcessor->calculateFeatureSet();
+ docsumsStateCallback.setSummaryFeatures(sf);
+ }
+
+ // calculate rank features and set them on the callback object
+ if (_dumpFeatures) {
+ LOG(debug, "Calculate rank features");
+ search::FeatureSet::SP rf = _dumpProcessor->calculateFeatureSet();
+ docsumsStateCallback.setRankFeatures(rf);
+ }
+ }
+}
+
+SearchVisitor::SyntheticFieldsController::SyntheticFieldsController() :
+ _documentIdFId(StringFieldIdTMap::npos)
+{
+}
+
+void
+SearchVisitor::SyntheticFieldsController::setup(const StringFieldIdTMap & fieldRegistry,
+ const StringFieldIdTMap & /*fieldsInQuery*/)
+{
+ _documentIdFId = fieldRegistry.fieldNo("documentid");
+ assert(_documentIdFId != StringFieldIdTMap::npos);
+}
+
+void
+SearchVisitor::SyntheticFieldsController::onDocument(vsm::StorageDocument & document)
+{
+ (void) document;
+}
+
+void
+SearchVisitor::SyntheticFieldsController::onDocumentMatch(vsm::StorageDocument & document,
+ const vespalib::string & documentId)
+{
+ document.setField(_documentIdFId, document::FieldValue::UP(new document::StringFieldValue(documentId)));
+}
+
+void
+SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec,
+ std::vector<vespalib::string> & fieldList)
+{
+ for (size_t i = 0; i < docsumSpec.size(); ++i) {
+ fieldList.push_back(docsumSpec[i].getOutputName());
+ const std::vector<vespalib::string> & inputNames = docsumSpec[i].getInputNames();
+ for (size_t j = 0; j < inputNames.size(); ++j) {
+ fieldList.push_back(inputNames[j]);
+ if (document::PositionDataType::isZCurveFieldName(inputNames[j])) {
+ fieldList.push_back(document::PositionDataType::cutZCurveFieldName(inputNames[j]));
+ }
+ }
+ }
+ // fields used during sorting
+ fieldList.push_back("[docid]");
+ fieldList.push_back("[rank]");
+ fieldList.push_back("documentid");
+}
+
+void
+SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
+ StringFieldIdTMap & fieldsInQuery)
+{
+ // Create mapping from field name to field id, from field id to search spec,
+ // and from index name to list of field ids
+ _fieldSearchSpecMap.buildFromConfig(_vsmAdapter->getFieldsConfig());
+ // Add extra elements to mapping from field name to field id
+ _fieldSearchSpecMap.buildFromConfig(additionalFields);
+
+ // Reconfig field searchers based on the query
+ _fieldSearchSpecMap.reconfigFromQuery(_query);
+
+ // Map field name to field id for all fields in the query
+ _fieldSearchSpecMap.buildFieldsInQuery(_query, fieldsInQuery);
+ // Connect field names in the query to field searchers
+ _fieldSearchSpecMap.buildSearcherMap(fieldsInQuery.map(), _fieldSearcherMap);
+
+ // prepare the field searchers
+ _fieldSearcherMap.prepare(_fieldSearchSpecMap.documentTypeMap(), _searchBuffer, _query);
+}
+
+void
+SearchVisitor::setupSnippetModifiers()
+{
+ search::QueryTermList qtl;
+ _query.getLeafs(qtl);
+ _snippetModifierManager.setup(qtl, _fieldSearchSpecMap.specMap(), _fieldSearchSpecMap.documentTypeMap().begin()->second);
+}
+
+void
+SearchVisitor::setupScratchDocument(const StringFieldIdTMap & fieldsInQuery)
+{
+ if (_fieldSearchSpecMap.documentTypeMap().empty()) {
+ throw vespalib::IllegalStateException("Illegal config: There must be at least 1 document type in the 'vsmfields' config");
+ }
+ // Setup document type mapping
+ if (_fieldSearchSpecMap.documentTypeMap().size() != 1) {
+ LOG(warning, "We have %zd document types in the vsmfields config when we expected 1. Using the first one",
+ _fieldSearchSpecMap.documentTypeMap().size());
+ }
+ _fieldsUnion = fieldsInQuery.map();
+ for(vsm::StringFieldIdTMapT::const_iterator it(_fieldSearchSpecMap.nameIdMap().map().begin()),
+ mt(_fieldSearchSpecMap.nameIdMap().map().end()); it != mt; it++) {
+ if (_fieldsUnion.find(it->first) == _fieldsUnion.end()) {
+ LOG(debug, "Adding field '%s' from _fieldSearchSpecMap", it->first.c_str());
+ _fieldsUnion[it->first] = it->second;
+ }
+ }
+ // Init based on default document type and mapping from field name to field id
+ _docTypeMapping.init(_fieldSearchSpecMap.documentTypeMap().begin()->first,
+ _fieldsUnion, *_component.getTypeRepo());
+ _docTypeMapping.prepareBaseDoc(_fieldPathMap);
+}
+
+void
+SearchVisitor::setupDocsumObjects()
+{
+ std::unique_ptr<DocsumFilter> docsumFilter(new DocsumFilter(_vsmAdapter->getDocsumTools(), _rankController.getRankProcessor()->getHitCollector()));
+ docsumFilter->init(_fieldSearchSpecMap.nameIdMap(), *_fieldPathMap);
+ docsumFilter->setSnippetModifiers(_snippetModifierManager.getModifiers());
+ _summaryGenerator.setFilter(std::move(docsumFilter));
+ if (_vsmAdapter->getDocsumTools().get()) {
+ GetDocsumsState * ds(&_summaryGenerator.getDocsumState());
+ _vsmAdapter->getDocsumTools()->getDocsumWriter()->InitState(_attrMan, ds);
+ _summaryGenerator.setDocsumWriter(*_vsmAdapter->getDocsumTools()->getDocsumWriter());
+ for (const IAttributeVector * v : ds->_attributes) {
+ if (v != NULL) {
+ vespalib::string name(v->getName());
+ vsm::FieldIdT fid = _fieldSearchSpecMap.nameIdMap().fieldNo(name);
+ if ( fid != StringFieldIdTMap::npos ) {
+ AttributeGuard::UP attr(_attrMan.getAttribute(name));
+ if (attr->valid()) {
+ size_t index(_attributeFields.size());
+ for (size_t j(0); j < index; j++) {
+ if (_attributeFields[j]._field == fid) {
+ index = j;
+ }
+ }
+ if (index == _attributeFields.size()) {
+ _attributeFields.push_back(AttrInfo(fid, std::move(attr)));
+ }
+ } else {
+ LOG(warning, "Attribute '%s' is not valid", name.c_str());
+ }
+ } else {
+ LOG(warning, "No field with name '%s'. Odd ....", name.c_str());
+ }
+ }
+ }
+ } else {
+ LOG(warning, "No docsum tools available");
+ }
+}
+
+void
+SearchVisitor::setupAttributeVectors()
+{
+ const FieldPathMapT & fm = *_fieldPathMap;
+ for (FieldPathMapT::const_iterator it(fm.begin()), mt(fm.end()); it != mt; it++) {
+ if ( ! it->empty() ) {
+ vespalib::string attrName(it->front().getName());
+ for (FieldPath::const_iterator ft(it->begin()+1), fmt(it->end()); ft != fmt; ft++) {
+ attrName.append(".");
+ attrName.append(ft->getName());
+ }
+
+ enum FieldDataType {
+ OTHER = 0,
+ ARRAY,
+ WSET
+ };
+ FieldDataType typeSeen = OTHER;
+ for (FieldPath::const_iterator ft(it->begin()), fmt(it->end()); ft != fmt; ft++) {
+ int dataTypeId(ft->getDataType().getClass().id());
+ if (dataTypeId == document::ArrayDataType::classId) {
+ typeSeen = ARRAY;
+ } else if (dataTypeId == document::MapDataType::classId) {
+ typeSeen = ARRAY;
+ } else if (dataTypeId == document::WeightedSetDataType::classId) {
+ typeSeen = WSET;
+ }
+ }
+ const document::FieldValue & fv = it->back().getFieldValueToSet();
+ AttributeVector::SP attr;
+ if (typeSeen == ARRAY) {
+ attr = createMultiValueAttribute(attrName, fv, true);
+ } else if (typeSeen == WSET) {
+ attr = createMultiValueAttribute (attrName, fv, false);
+ } else {
+ attr = createAttribute(attrName, fv);
+ }
+
+ if (attr.get()) {
+ LOG(debug, "Adding attribute '%s' for field '%s' with data type '%s' (%s)",
+ attr->getName().c_str(), attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name());
+ if ( ! _attrMan.add(attr) ) {
+ LOG(warning, "Failed adding attribute '%s' for field '%s' with data type '%s' (%s)",
+ attr->getName().c_str(), attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name());
+ }
+ } else {
+ LOG(debug, "Cannot setup attribute for field '%s' with data type '%s' (%s). Aggregation and sorting will not work for this field",
+ attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name());
+ }
+ }
+ }
+}
+
+void
+SearchVisitor::setupAttributeVectorsForSorting(const search::common::SortSpec & sortList)
+{
+ if ( ! sortList.empty() ) {
+ for (size_t i(0), m(sortList.size()); i < m; i++) {
+ const search::common::SortInfo & sInfo(sortList[i]);
+ vsm::FieldIdT fid = _fieldSearchSpecMap.nameIdMap().fieldNo(sInfo._field);
+ if ( fid != StringFieldIdTMap::npos ) {
+ AttributeGuard::UP attr(_attrMan.getAttribute(sInfo._field));
+ if (attr->valid()) {
+ if (!(*attr)->hasMultiValue()) {
+ size_t index(_attributeFields.size());
+ for(size_t j(0); j < index; j++) {
+ if (_attributeFields[j]._field == fid) {
+ index = j;
+ _attributeFields[index]._ascending = sInfo._ascending;
+ _attributeFields[index]._converter = sInfo._converter.get();
+ }
+ }
+ if (index == _attributeFields.size()) {
+ _attributeFields.push_back(AttrInfo(fid, std::move(attr), sInfo._ascending, sInfo._converter.get()));
+ }
+ _sortList.push_back(index);
+ } else {
+ LOG(warning, "Attribute '%s' is not sortable", sInfo._field.c_str());
+ }
+ } else {
+ LOG(warning, "Attribute '%s' is not valid", sInfo._field.c_str());
+ }
+ } else {
+ LOG(warning, "Cannot locate field '%s' in field name registry", sInfo._field.c_str());
+ }
+ }
+ } else {
+ LOG(debug, "No sort specification received");
+ }
+}
+
+void
+SearchVisitor::setupGrouping(const std::vector<char> & groupingBlob)
+{
+ vespalib::nbostream iss(&groupingBlob[0], groupingBlob.size());
+ vespalib::NBOSerializer is(iss);
+ uint32_t numGroupings(0);
+ is >> numGroupings;
+ for(size_t i(0); i < numGroupings; i++) {
+ std::unique_ptr<Grouping> ag(new Grouping());
+ ag->deserialize(is);
+ GroupingList::value_type groupingPtr(ag.release());
+ Grouping & grouping = *groupingPtr;
+ Attribute2DocumentAccessor attr2Doc;
+ grouping.select(attr2Doc, attr2Doc);
+ LOG(debug, "Grouping # %ld with id(%d)", i, grouping.getId());
+ try {
+ search::expression::ConfigureStaticParams stuff(_attrCtx.get(), &_docTypeMapping.getCurrentDocumentType());
+ grouping.configureStaticStuff(stuff);
+ HitsResultPreparator preparator(_summaryGenerator);
+ grouping.select(preparator, preparator);
+ grouping.preAggregate(false);
+ if (!grouping.getAll() || (preparator.getNumHitsAggregators() == 0)) {
+ _groupingList.push_back(groupingPtr);
+ } else {
+ LOG(warning, "You can not collect hits with an all aggregator yet.");
+ }
+ } catch (const std::exception & e) {
+ LOG(error, "Could not locate attribute for grouping number %ld : %s", i, e.what());
+ }
+ }
+}
+
+class SingleDocumentStore : public vsm::IDocSumCache
+{
+public:
+ SingleDocumentStore(const vsm::StorageDocument & doc) : _doc(doc) { }
+ virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const {
+ (void) docId;
+ return _doc;
+ }
+private:
+ const vsm::StorageDocument & _doc;
+};
+
+bool
+SearchVisitor::compatibleDocumentTypes(const document::DocumentType& typeA,
+ const document::DocumentType& typeB) const
+{
+ if (&typeA == &typeB) {
+ return true;
+ } else {
+ return (typeA.getName() == typeB.getName());
+ }
+}
+
+void
+SearchVisitor::handleDocuments(const document::BucketId&,
+ std::vector<spi::DocEntry::LP>& entries,
+ HitCounter& hitCounter)
+{
+ (void) hitCounter;
+ if (_vsmAdapter == NULL) {
+ init(_params);
+ }
+ if ( ! _rankController.valid() ) {
+ //Prevent continuing with bad config.
+ return;
+ }
+ document::DocumentId emptyId;
+ LOG(debug, "SearchVisitor '%s' handling block of %zu documents",
+ _id.c_str(), entries.size());
+ size_t highestFieldNo(_fieldSearchSpecMap.nameIdMap().highestFieldNo());
+
+ const document::DocumentType* defaultDocType =
+ _docTypeMapping.getDefaultDocumentType();
+ assert(defaultDocType);
+ for (size_t i = 0; i< entries.size(); ++i) {
+ spi::DocEntry& entry(*entries[i]);
+ vsm::StorageDocument::SP document(
+ new StorageDocument(entry.releaseDocument()));
+ document->fieldPathMap(_fieldPathMap);
+ document->setFieldCount(highestFieldNo);
+
+ try {
+ document->init();
+ if (defaultDocType != NULL
+ && !compatibleDocumentTypes(*defaultDocType,
+ document->docDoc().getType()))
+ {
+ LOG(debug, "Skipping document of type '%s' when "
+ "handling only documents of type '%s'",
+ document->docDoc().getType().getName().c_str(),
+ defaultDocType->getName().c_str());
+ } else {
+ if (handleDocument(document)) {
+ _backingDocuments.push_back(document);
+ }
+ }
+ } catch (const std::exception & e) {
+ LOG(warning, "Caught exception handling document '%s'. Exception='%s'",
+ document->docDoc().getId().getScheme().toString().c_str(),
+ e.what());
+ }
+ }
+}
+
+bool
+SearchVisitor::handleDocument(const vsm::StorageDocument::SP & document)
+{
+ bool needToKeepDocument(false);
+ _syntheticFieldsController.onDocument(*document);
+ group(document->docDoc(), 0, true);
+ if (match(*document)) {
+ RankProcessor & rp = *_rankController.getRankProcessor();
+ vespalib::string documentId(document->docDoc().getId().getScheme().toString());
+ LOG(debug, "Matched document with id '%s'", documentId.c_str());
+
+ document->setDocId(rp.getMatchData().getDocId());
+
+ fillAttributeVectors(documentId, *document);
+
+ _rankController.rankMatchedDocument(rp.getMatchData().getDocId());
+
+ if (_shouldFillRankAttribute) {
+ _rankAttribute.add(rp.getRankScore());
+ }
+
+ if (_rankController.keepMatchedDocument()) {
+
+ bool amongTheBest = _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, document);
+
+ _syntheticFieldsController.onDocumentMatch(*document, documentId);
+
+ SingleDocumentStore single(*document);
+ _summaryGenerator.setDocsumCache(single);
+ group(document->docDoc(), rp.getRankScore(), false);
+
+ if (amongTheBest) {
+ document->saveCachedFields();
+ needToKeepDocument = true;
+ }
+
+ } else {
+ _hitsRejectedCount++;
+ LOG(debug, "Do not keep document with id '%s' because rank score (%f) <= rank score drop limit (%f)",
+ documentId.c_str(),
+ rp.getRankScore(),
+ _rankController.getRankSetup()->getRankScoreDropLimit());
+ }
+ } else {
+ LOG(debug, "Did not match document with id '%s'", document->docDoc().getId().getScheme().toString().c_str());
+ }
+ return needToKeepDocument;
+}
+
+void
+SearchVisitor::group(const document::Document & doc, search::HitRank rank, bool all)
+{
+ LOG(spam, "Group all: %s", all ? "true" : "false");
+ for(GroupingList::iterator it(_groupingList.begin()), mt(_groupingList.end()); it != mt; it++) {
+ GroupingEntry & grouping(*it);
+ if (all == grouping->getAll()) {
+ grouping.aggregate(doc, rank);
+ LOG(spam, "Actually group document with id '%s'", doc.getId().getScheme().toString().c_str());
+ }
+ }
+}
+
+bool
+SearchVisitor::match(const vsm::StorageDocument & doc)
+{
+ for (FieldIdTSearcherMap::iterator it = _fieldSearcherMap.begin(), mt = _fieldSearcherMap.end(); it != mt; it++) {
+ FieldSearcher & fSearch = *(*it);
+ fSearch.search(doc);
+ }
+ bool hit(_query.evaluate());
+ if (hit) {
+ _hitCount++;
+ LOG(spam, "Match in doc %d", doc.getDocId());
+
+ _rankController.onDocumentMatch(_hitCount - 1); // send in the local docId to use for this hit
+ }
+ _docSearchedCount++;
+ _query.reset();
+ return hit;
+}
+
+void
+SearchVisitor::fillAttributeVectors(const vespalib::string & documentId, const StorageDocument & document)
+{
+ for (size_t i(0), im(_attributeFields.size()); i < im; i++) {
+ const AttrInfo & finfo = _attributeFields[i];
+ const AttributeGuard &finfoGuard(*finfo._attr);
+ bool isPosition = finfoGuard->getClass().inherits(search::IntegerAttribute::classId) && document::PositionDataType::isZCurveFieldName(finfoGuard->getName());
+ LOG(debug, "Filling attribute '%s', isPosition='%s'", finfoGuard->getName().c_str(), isPosition ? "true" : "false");
+ uint32_t fieldId = finfo._field;
+ if (isPosition) {
+ vespalib::stringref org = document::PositionDataType::cutZCurveFieldName(finfoGuard->getName());
+ fieldId = _fieldsUnion.find(org)->second;
+ }
+ const StorageDocument::SubDocument & subDoc = document.getComplexField(fieldId);
+ search::AttributeVector & attrV = const_cast<search::AttributeVector & >(*finfoGuard);
+ search::AttributeVector::DocId docId(0);
+ attrV.addDoc(docId);
+ if (subDoc.getFieldValue() != NULL) {
+ LOG(debug, "value = '%s'", subDoc.getFieldValue()->toString().c_str());
+ if (isPosition) {
+ LOG(spam, "Position");
+ PositionInserter pi(attrV, docId);
+ subDoc.getFieldValue()->iterateNested(subDoc.begin(), subDoc.end(), pi);
+ } else {
+ AttributeInserter ai(attrV, docId);
+ subDoc.getFieldValue()->iterateNested(subDoc.begin(), subDoc.end(), ai);
+ }
+ } else if (finfoGuard->getName() == "[docid]") {
+ _documentIdAttribute.add(documentId.c_str());
+ // assert((_docsumCache.cache().size() + 1) == _documentIdAttribute.getNumDocs());
+ } else if (finfoGuard->getName() == "[rank]") {
+ _shouldFillRankAttribute = true;
+ }
+ }
+}
+
+size_t
+SearchVisitor::fillSortBuffer()
+{
+ size_t pos(0);
+ for(size_t i(0), m(_sortList.size()); i != m; i++) {
+ int written(0);
+ const AttrInfo & finfo = _attributeFields[_sortList[i]];
+ const AttributeGuard &finfoGuard(*finfo._attr);
+ LOG(debug, "Adding sortdata for document %d for attribute '%s'",
+ finfoGuard->getNumDocs() - 1, finfoGuard->getName().c_str());
+// assert((_docsumCache.cache().size() + 1) == finfo._attr->getNumDocs());
+ do {
+ if (finfo._ascending) {
+ written = finfoGuard->serializeForAscendingSort(finfoGuard->getNumDocs()-1, &_tmpSortBuffer[0]+pos, _tmpSortBuffer.size() - pos, finfo._converter);
+ } else {
+ written = finfoGuard->serializeForDescendingSort(finfoGuard->getNumDocs()-1, &_tmpSortBuffer[0]+pos, _tmpSortBuffer.size() - pos, finfo._converter);
+ }
+ if (written == -1) {
+ _tmpSortBuffer.resize(_tmpSortBuffer.size()*2);
+ }
+ } while (written == -1);
+ pos += written;
+ }
+ return pos;
+}
+
+void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
+{
+ LOG(debug, "Completed bucket");
+}
+
+void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
+{
+ if (_vsmAdapter == NULL) {
+ init(_params);
+ }
+ LOG(debug, "Completed visiting");
+ vdslib::SearchResult & searchResult(_queryResult->getSearchResult());
+ vdslib::DocumentSummary & documentSummary(_queryResult->getDocumentSummary());
+ LOG(debug, "Hit count: %lu", searchResult.getHitCount());
+
+ _rankController.onCompletedVisiting(_summaryGenerator.getDocsumCallback(), searchResult);
+ LOG(debug, "Hit count: %lu", searchResult.getHitCount());
+
+ /// Now I can sort. No more documentid access order.
+ searchResult.sort();
+ searchResult.setTotalHitCount(_hitCount - _hitsRejectedCount);
+
+ const char* docId;
+ vdslib::SearchResult::RankType rank;
+ for (uint32_t i = 0; i < searchResult.getHitCount(); i++) {
+ searchResult.getHit(i, docId, rank);
+ hitCounter.addHit(document::DocumentId(docId), 0);
+ }
+
+ generateGroupingResults();
+
+ generateDocumentSummaries();
+ _backingDocuments.clear();
+
+ documentSummary.sort();
+ LOG(debug, "Docsum count: %lu", documentSummary.getSummaryCount());
+}
+
+void SearchVisitor::completedVisiting(HitCounter& hitCounter)
+{
+ completedVisitingInternal(hitCounter);
+ sendMessage(documentapi::DocumentMessage::UP(_queryResult.release()));
+}
+
+void
+SearchVisitor::generateGroupingResults()
+{
+ vdslib::SearchResult & searchResult(_queryResult->getSearchResult());
+ for (GroupingList::iterator it(_groupingList.begin()), mt(_groupingList.end()); it != mt; it++) {
+ Grouping & grouping(**it);
+ LOG(debug, "grouping before postAggregate: %s", grouping.asString().c_str());
+ grouping.postAggregate();
+ grouping.postMerge();
+ grouping.sortById();
+ LOG(debug, "grouping after postAggregate: %s", grouping.asString().c_str());
+ vespalib::nbostream os;
+ vespalib::NBOSerializer nos(os);
+ grouping.serialize(nos);
+ vespalib::MallocPtr blob(os.size());
+ memcpy(blob, os.c_str(), os.size());
+ searchResult.getGroupingList().add(grouping.getId(), blob);
+ }
+}
+
+void
+SearchVisitor::generateDocumentSummaries()
+{
+ if ( ! _rankController.valid()) {
+ return;
+ }
+ _summaryGenerator.setDocsumCache(_rankController.getRankProcessor()->getHitCollector());
+ vdslib::SearchResult & searchResult(_queryResult->getSearchResult());
+ vdslib::DocumentSummary & documentSummary(_queryResult->getDocumentSummary());
+ for (size_t i(0), m(searchResult.getHitCount()); (i < m) && (i < searchResult.getWantedHitCount()); i++ ) {
+ const char * docId(NULL);
+ vdslib::SearchResult::RankType rank(0);
+ uint32_t lid = searchResult.getHit(i, docId, rank);
+ vespalib::ConstBufferRef docsum = _summaryGenerator.fillSummary(lid, _summaryClass);
+ documentSummary.addSummary(docId, docsum.c_str(), docsum.size());
+ LOG(debug, "Adding summary %ld: globalDocId(%s), localDocId(%u), rank(%f), bytes(%lu)",
+ i, docId, lid, rank, docsum.size());
+ }
+}
+
+
+}
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
new file mode 100644
index 00000000000..bc8d72c4177
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -0,0 +1,464 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vsm/common/docsum.h>
+#include <vespa/vsm/common/documenttypemapping.h>
+#include <vespa/vsm/common/storagedocument.h>
+#include <vespa/vsm/searcher/fieldsearcher.h>
+#include <vespa/vsm/vsm/docsumfilter.h>
+#include <vespa/vsm/vsm/fieldsearchspec.h>
+#include <vespa/vsm/vsm/snippetmodifier.h>
+#include <vespa/vsm/vsm/vsm-adapter.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/storage/visiting/visitor.h>
+#include <vespa/documentapi/messagebus/messages/queryresultmessage.h>
+#include "hitcollector.h"
+#include "indexenvironment.h"
+#include "queryenvironment.h"
+#include "rankmanager.h"
+#include "rankprocessor.h"
+#include "searchenvironment.h"
+
+using namespace search::aggregation;
+
+namespace storage {
+
+/**
+ * @class storage::SearchVisitor
+ *
+ * @brief Visitor that applies a search query to visitor data and
+ * converts them to a SearchResultCommand and a DocumentSummaryCommand.
+ **/
+class SearchVisitor : public Visitor {
+public:
+ SearchVisitor(StorageComponent&, VisitorEnvironment& vEnv,
+ const vdslib::Parameters & params);
+
+ ~SearchVisitor();
+private:
+ /**
+ * This struct wraps an attribute vector.
+ **/
+ struct AttrInfo {
+ public:
+ /**
+ * Construct a new object.
+ *
+ * @param fid the field id of the attribute field.
+ * @param attr a guard to the attribute vector.
+ **/
+ AttrInfo(vsm::FieldIdT fid, search::AttributeGuard::UP attr) :
+ _field(fid),
+ _ascending(true),
+ _converter(NULL),
+ _attr(std::move(attr))
+ {
+ }
+ /**
+ * Construct a new object.
+ *
+ * @param fid the field id of the attribute field.
+ * @param attr a guard to the attribute vector.
+ * @param ascending whether this attribute should be sorted ascending or not.
+ * @param converter is a converter to apply to the attribute before sorting.
+ **/
+ AttrInfo(vsm::FieldIdT fid, search::AttributeGuard::UP attr, bool ascending, const search::common::BlobConverter * converter) :
+ _field(fid),
+ _ascending(ascending),
+ _converter(converter),
+ _attr(std::move(attr))
+ {
+ }
+ vsm::FieldIdT _field;
+ bool _ascending;
+ const search::common::BlobConverter * _converter;
+ search::AttributeGuard::UP _attr;
+ };
+
+ /**
+ * This class gets callbacks when iterating through a field value and
+ * inserts the values into a given attribute vector.
+ **/
+ class AttributeInserter : public document::FieldValue::IteratorHandler {
+ protected:
+ typedef document::FieldValue::IteratorHandler::Content IteratorContent;
+ search::AttributeVector & _attribute;
+ search::AttributeVector::DocId _docId;
+
+ virtual void onPrimitive(const IteratorContent & c);
+
+ public:
+ AttributeInserter(search::AttributeVector & attribute, search::AttributeVector::DocId docId);
+ };
+
+ class PositionInserter : public AttributeInserter {
+ public:
+ PositionInserter(search::AttributeVector & attribute, search::AttributeVector::DocId docId);
+ private:
+ virtual void onPrimitive(const IteratorContent & c);
+ virtual void onStructStart(const Content & fv);
+ document::Field _fieldX;
+ document::Field _fieldY;
+ document::IntFieldValue _valueX;
+ document::IntFieldValue _valueY;
+ };
+
+ /**
+ * This class controls all the ranking related objects.
+ **/
+ class RankController {
+ private:
+ vespalib::string _rankProfile;
+ RankManager::Snapshot::SP _rankManagerSnapshot;
+ const search::fef::RankSetup * _rankSetup;
+ search::fef::Properties _queryProperties;
+ bool _hasRanking;
+ RankProcessor::UP _rankProcessor;
+ bool _dumpFeatures;
+ RankProcessor::UP _dumpProcessor;
+
+ /**
+ * Process attribute hints and add needed attributes to the given list.
+ **/
+ void processHintedAttributes(const IndexEnvironment & indexEnv, bool rank,
+ const search::IAttributeManager & attrMan,
+ std::vector<AttrInfo> & attributeFields);
+
+ public:
+ RankController();
+ bool valid() const { return _rankProcessor.get() != NULL; }
+ void setRankProfile(const vespalib::string &rankProfile) { _rankProfile = rankProfile; }
+ const vespalib::string &getRankProfile() const { return _rankProfile; }
+ void setRankManagerSnapshot(const RankManager::Snapshot::SP & snapshot) { _rankManagerSnapshot = snapshot; }
+ search::fef::Properties & getQueryProperties() { return _queryProperties; }
+ RankProcessor * getRankProcessor() { return _rankProcessor.get(); }
+ void setDumpFeatures(bool dumpFeatures) { _dumpFeatures = dumpFeatures; }
+ bool getDumpFeatures() const { return _dumpFeatures; }
+ const search::fef::RankSetup * getRankSetup() const { return _rankSetup; }
+
+ /**
+ * Setup rank processors used for ranking and dumping.
+ *
+ * @param query the query associated with the search visitor.
+ * @param wantedHitCount number of hits wanted.
+ * @param attrMan the attribute manager.
+ * @param attributeFields the list of attribute vectors needed.
+ **/
+ void setupRankProcessors(search::Query & query,
+ const vespalib::string & location,
+ size_t wantedHitCount,
+ const search::IAttributeManager & attrMan,
+ std::vector<AttrInfo> & attributeFields);
+ /**
+ * Callback function that is called for each document that match.
+ * Unpack match data.
+ *
+ * @param docId the docId to use for this hit
+ **/
+ void onDocumentMatch(uint32_t docId);
+
+ /**
+ * Calculate rank for a matched document.
+ **/
+ void rankMatchedDocument(uint32_t docId);
+
+ /**
+ * Returns whether we should keep the matched document.
+ * Use the rank-score-drop-limit to decide this.
+ **/
+ bool keepMatchedDocument();
+
+ /**
+ * Collect a matched document in the hit collector.
+ * Take sort spec into consideration if used.
+ *
+ * @param hasSorting whether the search result should be sorted.
+ * @param visitor the search visitor.
+ * @param tmpSortBuffer the sort buffer containing the sort data.
+ * @param documentId the document id of the document to collect.
+ * @return true if the document was added to the heap
+ **/
+ bool collectMatchedDocument(bool hasSorting,
+ SearchVisitor & visitor,
+ const std::vector<char> & tmpSortBuffer,
+ const vsm::StorageDocument::SP & documentId);
+ /**
+ * Callback function that is called when visiting is completed.
+ * Perform second phase ranking and calculate summary features / rank features if asked for.
+ *
+ * @param docsumsStateCallback state object to store summary features and rank features.
+ **/
+ void onCompletedVisiting(vsm::GetDocsumsStateCallback & docsumsStateCallback, vdslib::SearchResult & searchResult);
+ };
+
+ /**
+ * This class controls all the synthetic fields
+ **/
+ class SyntheticFieldsController {
+ private:
+ vsm::FieldIdT _documentIdFId;
+
+ public:
+ SyntheticFieldsController();
+
+ /**
+ * Setup synthetic fields, like 'sddocname' and 'documentid'.
+ *
+ * @param fieldRegistry mapping from field name to field id for all known fields.
+ * @param fieldsInQuery mapping from field name to field id for fields mentioned in the query.
+ **/
+ void setup(const vsm::StringFieldIdTMap & fieldRegistry,
+ const vsm::StringFieldIdTMap & fieldsInQuery);
+
+ /**
+ * Callback function that is called for each document received.
+ *
+ * @param document the document received.
+ **/
+ void onDocument(vsm::StorageDocument & document);
+
+ /**
+ * Callback function that is called for each document matched.
+ *
+ * @param document the document matched.
+ * @param documentId the document id of the matched document.
+ **/
+ void onDocumentMatch(vsm::StorageDocument & document,
+ const vespalib::string & documentId);
+ };
+
+ /**
+ * Register field names from the given docsum spec into the given field name list.
+ * These field names are in addition to the field names found in the vsmfields config.
+ * Duplicates are removed when later building mapping from field name to field id.
+ *
+ * @param docsumSpec config with the field names used by the docsum setup.
+ * @param fieldList list of field names that are built.
+ **/
+ void registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec,
+ std::vector<vespalib::string> & fieldList);
+
+ /**
+ * Setup the field searchers used when matching the query with the stream of documents.
+ * This includes setting up various mappings in FieldSearchSpecMap and building mapping
+ * for fields used by the query.
+ *
+ * @param additionalFields list of additional field names used when setting up the mappings.
+ * @param fieldsInQuery mapping from field name to field id that are built based on the query.
+ **/
+ void setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
+ vsm::StringFieldIdTMap & fieldsInQuery);
+
+ /**
+ * Setup snippet modifiers for the fields where we have substring search.
+ * The modifiers will be used when generating docsum.
+ **/
+ void setupSnippetModifiers();
+
+ /**
+ * Setup the scratch document that is used when receiving a stream of documents through the visitor api.
+ * Each document in this stream is serialized into the scratch document and passed to vsm for matching.
+ **/
+ void setupScratchDocument(const vsm::StringFieldIdTMap & fieldsInQuery);
+
+ /**
+ * Setup the objects used for document summary.
+ **/
+ void setupDocsumObjects();
+
+ /**
+ * Create and register an attribute vector in the attribute manager for each field value in the scratch document.
+ * If later needed during evaluation, these attribute vectors are filled with the actual
+ * value(s) from the scratch document.
+ **/
+ void setupAttributeVectors();
+
+ /**
+ * Setup attribute vectors needed for sorting.
+ *
+ * @param sortList the list of attributes needed for sorting.
+ **/
+ void setupAttributeVectorsForSorting(const search::common::SortSpec & sortList);
+
+ /**
+ * Setup grouping based on the given grouping blob.
+ *
+ * @param groupingBlob the binary representation of the grouping specification.
+ **/
+ void setupGrouping(const std::vector<char> & groupingBlob);
+
+ // Inherit doc from Visitor
+ void handleDocuments(const document::BucketId&,
+ std::vector<spi::DocEntry::LP>& entries,
+ HitCounter& hitCounter) override;
+
+ bool compatibleDocumentTypes(const document::DocumentType& typeA,
+ const document::DocumentType& typeB) const;
+
+ /**
+ * Process one document
+ * @param document Document to process.
+ * @return true if the underlying buffer is needed later on, then it must be kept.
+ */
+ bool handleDocument(const vsm::StorageDocument::SP & document);
+
+ /**
+ * Collect the given document for grouping.
+ *
+ * @param doc the document used for grouping.
+ * @param all whether we should group all documents, not just hits.
+ **/
+ void group(const document::Document & doc, search::HitRank rank, bool all);
+
+ /**
+ * Check if the given document matches the query.
+ *
+ * @param doc the document to match.
+ * @return whether the document matched the query.
+ **/
+ bool match(const vsm::StorageDocument & doc);
+
+ /**
+ * Fill attribute vectors needed for aggregation and sorting with values from the scratch document.
+ *
+ * @param documentId the document id of the matched document.
+ **/
+ void fillAttributeVectors(const vespalib::string & documentId, const vsm::StorageDocument & document);
+
+ /**
+ * Fill the sort buffer based on the attribute vectors needed for sorting.
+ *
+ * @return the position of the sort buffer.
+ **/
+ size_t fillSortBuffer();
+
+ // Inherit doc from Visitor
+ void completedBucket(const document::BucketId&, HitCounter& counter) override;
+
+ // Inherit doc from Visitor
+ void completedVisiting(HitCounter& counter) override;
+
+ spi::ReadConsistency getRequiredReadConsistency() const override {
+ // Searches are not considered to require strong consistency.
+ return spi::ReadConsistency::WEAK;
+ }
+
+ /**
+ * Required to be called at least once.
+ */
+ void completedVisitingInternal(HitCounter& counter);
+
+ /**
+ * Generate grouping results from the new grouping framework (if any) and add them to the search result.
+ **/
+ void generateGroupingResults();
+
+ /**
+ * Generate document summaries for a specified subset of the hits.
+ **/
+ void generateDocumentSummaries();
+
+ class GroupingEntry : std::shared_ptr<Grouping> {
+ public:
+ GroupingEntry(Grouping * grouping);
+ void aggregate(const document::Document & doc, search::HitRank rank);
+ const Grouping & operator * () const { return *_grouping; }
+ Grouping & operator * () { return *_grouping; }
+ const Grouping * operator -> () const { return _grouping.get(); }
+ private:
+ std::shared_ptr<Grouping> _grouping;
+ size_t _count;
+ size_t _limit;
+ };
+ typedef std::vector< GroupingEntry > GroupingList;
+ typedef std::vector<vsm::StorageDocument::SP> DocumentVector;
+
+ class SummaryGenerator : public HitsAggregationResult::SummaryGenerator
+ {
+ public:
+ SummaryGenerator();
+ GetDocsumsState & getDocsumState() { return _docsumState; }
+ vsm::GetDocsumsStateCallback & getDocsumCallback() { return _callback; }
+ void setFilter(std::unique_ptr<vsm::DocsumFilter> filter) { _docsumFilter = std::move(filter); }
+ void setDocsumCache(const vsm::IDocSumCache & cache) { _docsumFilter->setDocSumStore(cache); }
+ void setDocsumWriter(IDocsumWriter & docsumWriter) { _docsumWriter = & docsumWriter; }
+ virtual vespalib::ConstBufferRef fillSummary(search::AttributeVector::DocId lid, const HitsAggregationResult::SummaryClassType & summaryClass);
+ private:
+ vsm::GetDocsumsStateCallback _callback;
+ GetDocsumsState _docsumState;
+ std::unique_ptr<vsm::DocsumFilter> _docsumFilter;
+ search::docsummary::IDocsumWriter * _docsumWriter;
+ search::RawBuf _rawBuf;
+ };
+
+ class HitsResultPreparator : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ HitsResultPreparator(SummaryGenerator & summaryGenerator) :
+ _summaryGenerator(summaryGenerator),
+ _numHitsAggregators(0)
+ { }
+ size_t getNumHitsAggregators() const { return _numHitsAggregators; }
+ private:
+ virtual void execute(vespalib::Identifiable &obj);
+ virtual bool check(const vespalib::Identifiable &obj) const;
+ SummaryGenerator & _summaryGenerator;
+ size_t _numHitsAggregators;
+ };
+
+ void init(const vdslib::Parameters & params);
+ SearchEnvironment & _env;
+ vdslib::Parameters _params;
+ const vsm::VSMAdapter * _vsmAdapter;
+ size_t _docSearchedCount;
+ size_t _hitCount;
+ size_t _hitsRejectedCount;
+ search::Query _query;
+ std::unique_ptr<documentapi::QueryResultMessage> _queryResult;
+ vsm::FieldIdTSearcherMap _fieldSearcherMap;
+ vsm::SharedFieldPathMap _fieldPathMap;
+ vsm::DocumentTypeMapping _docTypeMapping;
+ vsm::FieldSearchSpecMap _fieldSearchSpecMap;
+ vsm::SnippetModifierManager _snippetModifierManager;
+ SummaryGenerator _summaryGenerator;
+ vespalib::string _summaryClass;
+ search::AttributeManager _attrMan;
+ search::attribute::IAttributeContext::UP _attrCtx;
+ GroupingList _groupingList;
+ std::vector<AttrInfo> _attributeFields;
+ search::common::SortSpec _sortSpec;
+ std::vector<size_t> _sortList;
+ IDocsumWriter * _docsumWriter;
+ vsm::SharedSearcherBuf _searchBuffer;
+ std::vector<char> _tmpSortBuffer;
+ search::AttributeVector::SP _documentIdAttributeBacking;
+ search::AttributeVector::SP _rankAttributeBacking;
+ search::SingleStringExtAttribute & _documentIdAttribute;
+ search::SingleFloatExtAttribute & _rankAttribute;
+ bool _shouldFillRankAttribute;
+ SyntheticFieldsController _syntheticFieldsController;
+ RankController _rankController;
+ DocumentVector _backingDocuments;
+ vsm::StringFieldIdTMapT _fieldsUnion;
+};
+
+class SearchVisitorFactory : public VisitorFactory {
+ config::ConfigUri _configUri;
+ VisitorEnvironment::UP makeVisitorEnvironment(StorageComponent&);
+
+ Visitor* makeVisitor(StorageComponent&, VisitorEnvironment&env,
+ const vdslib::Parameters& params);
+public:
+ SearchVisitorFactory(const config::ConfigUri & configUri);
+};
+
+}
+
diff --git a/streamingvisitors/src/vespa/snippetvisitor/.gitignore b/streamingvisitors/src/vespa/snippetvisitor/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/streamingvisitors/src/vespa/snippetvisitor/.gitignore