aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-05-15 00:40:43 +0200
committerGitHub <noreply@github.com>2022-05-15 00:40:43 +0200
commitdacf557add1c6a3ffab036cdf2f7dfdf9750b22e (patch)
tree3a9dfff58b98898e2e28c0337925f4f04e5eaeb0 /streamingvisitors/src/vespa/vsm
parent2722ce9d1d1ec12d57ebd3833ce37b0958afb752 (diff)
Revert "Collapse vsm into streamingvisitors"
Diffstat (limited to 'streamingvisitors/src/vespa/vsm')
-rw-r--r--streamingvisitors/src/vespa/vsm/.gitignore3
-rw-r--r--streamingvisitors/src/vespa/vsm/common/.gitignore5
-rw-r--r--streamingvisitors/src/vespa/vsm/common/CMakeLists.txt10
-rw-r--r--streamingvisitors/src/vespa/vsm/common/charbuffer.cpp32
-rw-r--r--streamingvisitors/src/vespa/vsm/common/charbuffer.h52
-rw-r--r--streamingvisitors/src/vespa/vsm/common/docsum.h22
-rw-r--r--streamingvisitors/src/vespa/vsm/common/document.cpp73
-rw-r--r--streamingvisitors/src/vespa/vsm/common/document.h68
-rw-r--r--streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp104
-rw-r--r--streamingvisitors/src/vespa/vsm/common/documenttypemapping.h54
-rw-r--r--streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp24
-rw-r--r--streamingvisitors/src/vespa/vsm/common/fieldmodifier.h58
-rw-r--r--streamingvisitors/src/vespa/vsm/common/storagedocument.cpp81
-rw-r--r--streamingvisitors/src/vespa/vsm/common/storagedocument.h59
-rw-r--r--streamingvisitors/src/vespa/vsm/config/.gitignore4
-rw-r--r--streamingvisitors/src/vespa/vsm/config/CMakeLists.txt11
-rw-r--r--streamingvisitors/src/vespa/vsm/config/vsm-cfif.h25
-rw-r--r--streamingvisitors/src/vespa/vsm/config/vsm.def13
-rw-r--r--streamingvisitors/src/vespa/vsm/config/vsmfields.def31
-rw-r--r--streamingvisitors/src/vespa/vsm/config/vsmsummary.def21
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/.gitignore5
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt28
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp56
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h21
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp301
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h147
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp70
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h53
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fold.cpp153
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fold.h12
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp310
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h26
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp78
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h28
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp49
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h33
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp56
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h22
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp33
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h25
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp69
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h35
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp56
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h25
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp320
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h138
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp59
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h23
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp144
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h72
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp54
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h25
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/.gitignore5
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt14
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp75
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h29
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp35
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h72
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp477
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h90
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp334
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h98
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp45
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h36
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h24
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp220
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h57
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp136
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h110
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp194
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h132
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp18
72 files changed, 0 insertions, 5477 deletions
diff --git a/streamingvisitors/src/vespa/vsm/.gitignore b/streamingvisitors/src/vespa/vsm/.gitignore
deleted file mode 100644
index 4c5f5d9ef7a..00000000000
--- a/streamingvisitors/src/vespa/vsm/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-.depend
-Makefile
-/libvsm.so.5.1
diff --git a/streamingvisitors/src/vespa/vsm/common/.gitignore b/streamingvisitors/src/vespa/vsm/common/.gitignore
deleted file mode 100644
index 95bc02923a9..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.exe
-*.ilk
-*.pdb
-.depend*
-Makefile
diff --git a/streamingvisitors/src/vespa/vsm/common/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/common/CMakeLists.txt
deleted file mode 100644
index 4570a9b581e..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_library(vsm_vsmcommon OBJECT
- SOURCES
- charbuffer.cpp
- document.cpp
- documenttypemapping.cpp
- fieldmodifier.cpp
- storagedocument.cpp
- DEPENDS
-)
diff --git a/streamingvisitors/src/vespa/vsm/common/charbuffer.cpp b/streamingvisitors/src/vespa/vsm/common/charbuffer.cpp
deleted file mode 100644
index b8fbb5c8846..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/charbuffer.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "charbuffer.h"
-#include <cstring>
-
-namespace vsm {
-
-CharBuffer::CharBuffer(size_t len) :
- _buffer(len),
- _pos(0)
-{ }
-
-void
-CharBuffer::put(const char * src, size_t n)
-{
- if (n > getRemaining()) {
- resize(_pos + (n * 2));
- }
- char * dst = &_buffer[_pos];
- memcpy(dst, src, n);
- _pos += n;
-}
-
-void
-CharBuffer::resize(size_t len)
-{
- if (len > getLength()) {
- _buffer.resize(len);
- }
-}
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/common/charbuffer.h b/streamingvisitors/src/vespa/vsm/common/charbuffer.h
deleted file mode 100644
index 08618a9b973..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/charbuffer.h
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vector>
-#include <memory>
-
-namespace vsm {
-
-/**
- * Simple growable char buffer.
- **/
-class CharBuffer
-{
-private:
- std::vector<char> _buffer;
- size_t _pos;
-
-public:
- typedef std::shared_ptr<CharBuffer> SP;
-
- /**
- * Creates a char buffer with len bytes.
- **/
- CharBuffer(size_t len = 0);
-
- /**
- * Copies n bytes from the src array into the underlying buffer at the
- * current position, and updates the position accordingly.
- * Resizing will occur if needed.
- **/
- void put(const char * src, size_t n);
-
- /**
- * Resizes the buffer so that the new length becomes len.
- * Resizing will not occur if len < current length.
- **/
- void resize(size_t len);
-
- /**
- * Resets the position to the beginning of the buffer.
- **/
- void reset() { _pos = 0; }
-
- const char * getBuffer() const { return &_buffer[0]; }
- size_t getLength() const { return _buffer.size(); }
- size_t getPos() const { return _pos; }
- size_t getRemaining() const { return getLength() - getPos(); }
- void put(char c) { put(&c, 1); }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/common/docsum.h b/streamingvisitors/src/vespa/vsm/common/docsum.h
deleted file mode 100644
index 49b84cb0783..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/docsum.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "document.h"
-
-namespace vsm {
-
-/**
- Will represent a cache of the document summaries. -> Actual docsums will be
- generated on the fly when requested. A document summary is accessed by its
- documentId.
-*/
-
-class IDocSumCache
-{
-public:
- virtual const Document & getDocSum(const search::DocumentIdT & docId) const = 0;
- virtual ~IDocSumCache() { }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/common/document.cpp b/streamingvisitors/src/vespa/vsm/common/document.cpp
deleted file mode 100644
index a345c82ce2d..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/document.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "document.h"
-#include <vespa/vespalib/stllike/asciistream.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
-
-using search::DocumentIdT;
-using search::TimeT;
-using document::FieldValue;
-
-namespace vsm
-{
-
-vespalib::asciistream & operator << (vespalib::asciistream & os, const FieldRef & f)
-{
- const char *s = f.data();
- os << f.size();
- if (s) {
- os << s; // Better hope it's null terminated!
- }
- os << " : ";
- return os;
-}
-
-vespalib::asciistream & operator << (vespalib::asciistream & os, const StringFieldIdTMap & f)
-{
- for (StringFieldIdTMapT::const_iterator it=f._map.begin(), mt=f._map.end(); it != mt; it++) {
- os << it->first << " = " << it->second << '\n';
- }
- return os;
-}
-
-StringFieldIdTMap::StringFieldIdTMap() :
- _map()
-{
-}
-
-void StringFieldIdTMap::add(const vespalib::string & s, FieldIdT fieldId)
-{
- _map[s] = fieldId;
-}
-
-void StringFieldIdTMap::add(const vespalib::string & s)
-{
- if (_map.find(s) == _map.end()) {
- FieldIdT fieldId = _map.size();
- _map[s] = fieldId;
- }
-}
-
-FieldIdT StringFieldIdTMap::fieldNo(const vespalib::string & fName) const
-{
- StringFieldIdTMapT::const_iterator found = _map.find(fName);
- FieldIdT fNo((found != _map.end()) ? found->second : npos);
- return fNo;
-}
-
-size_t StringFieldIdTMap::highestFieldNo() const
-{
- size_t maxFNo(0);
- for (const auto & field : _map) {
- if (field.second >= maxFNo) {
- maxFNo = field.second + 1;
- }
- }
- return maxFNo;
-}
-
-Document::~Document() { }
-
-}
-
-VESPALIB_HASH_MAP_INSTANTIATE(vespalib::string, vsm::FieldIdTList);
-VESPALIB_HASH_MAP_INSTANTIATE(vespalib::string, vsm::IndexFieldMapT);
diff --git a/streamingvisitors/src/vespa/vsm/common/document.h b/streamingvisitors/src/vespa/vsm/common/document.h
deleted file mode 100644
index 8c11d27072b..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/document.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/searchlib/query/base.h>
-#include <vespa/document/fieldvalue/fieldvalue.h>
-#include <vespa/vespalib/stllike/hash_map.h>
-#include <map>
-
-namespace vespalib {
- class asciistream;
-}
-
-namespace vsm {
-
-/// Type to identify fields in documents.
-typedef unsigned int FieldIdT;
-/// A type to represent a list of FieldIds.
-typedef std::vector<FieldIdT> FieldIdTList;
-/// A type to represent all the fields contained in all the indexs.
-typedef vespalib::hash_map<vespalib::string, FieldIdTList> IndexFieldMapT;
-/// A type to represent all the fields contained in all the indexs in an all the document types.
-typedef vespalib::hash_map<vespalib::string, IndexFieldMapT> DocumentTypeIndexFieldMapT;
-/// A type to represent a map from fieldname to fieldid.
-typedef std::map<vespalib::string, FieldIdT> StringFieldIdTMapT;
-
-class StringFieldIdTMap
-{
- public:
- enum { npos=0xFFFFFFFF };
- StringFieldIdTMap();
- FieldIdT fieldNo(const vespalib::string & fName) const;
- void add(const vespalib::string & s);
- void add(const vespalib::string & s, FieldIdT fNo);
- const StringFieldIdTMapT & map() const { return _map; }
- size_t highestFieldNo() const;
- friend vespalib::asciistream & operator << (vespalib::asciistream & os, const StringFieldIdTMap & f);
- private:
- StringFieldIdTMapT _map;
-};
-
-typedef vespalib::stringref FieldRef;
-
-/**
- This is the base class representing a document. It gives a document some
- basic properties. A document is a collection of fields, together with a
- document id and a time stamp.
-*/
-class Document
-{
- public:
- Document(size_t maxFieldCount) : _docId(0), _fieldCount(maxFieldCount) { }
- Document(search::DocumentIdT doc, size_t maxFieldCount) : _docId(doc), _fieldCount(maxFieldCount) { }
- virtual ~Document();
- const search::DocumentIdT & getDocId() const { return _docId; }
- size_t getFieldCount() const { return _fieldCount; }
- void setDocId(const search::DocumentIdT & v) { _docId = v; }
- virtual const document::FieldValue * getField(FieldIdT fId) const = 0;
- /**
- Returns true, if not possible to set.
- */
- virtual bool setField(FieldIdT fId, document::FieldValue::UP fv) = 0;
- private:
- search::DocumentIdT _docId;
- const size_t _fieldCount;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp b/streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp
deleted file mode 100644
index 7886c44b2e0..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "documenttypemapping.h"
-#include <vespa/document/repo/documenttyperepo.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.common.documenttypemapping");
-
-namespace vsm {
-
-DocumentTypeMapping::DocumentTypeMapping() :
- _fieldMap(),
- _defaultDocumentTypeName(),
- _defaultDocumentType(),
- _documentTypeFreq()
-{ }
-
-DocumentTypeMapping::~DocumentTypeMapping() { }
-
-namespace {
-
-vespalib::string getDocTypeId(const document::DocumentType & docType)
-{
- vespalib::string typeId(docType.getName());
- typeId += "0"; // Hardcoded version (version not supported)
- return typeId;
-}
-
-}
-
-void DocumentTypeMapping::init(const vespalib::string & defaultDocumentType,
- const StringFieldIdTMapT & fieldList,
- const document::DocumentTypeRepo &repo)
-{
- _defaultDocumentType = repo.getDocumentType(defaultDocumentType);
- _defaultDocumentTypeName = getDocTypeId(*_defaultDocumentType);
- LOG(debug, "Setting default document type to '%s'",
- _defaultDocumentTypeName.c_str());
- buildFieldMap(_defaultDocumentType, fieldList, _defaultDocumentTypeName);
-}
-
-bool DocumentTypeMapping::prepareBaseDoc(SharedFieldPathMap & map) const
-{
- FieldPathMapMapT::const_iterator found = _fieldMap.find(_defaultDocumentTypeName);
- if (found != _fieldMap.end()) {
- map = std::make_shared<FieldPathMapT>(found->second);
- LOG(debug, "Found FieldPathMap for default document type '%s' with %zd elements",
- _defaultDocumentTypeName.c_str(), map->size());
- } else {
- LOG(warning, "No FieldPathMap found for default document type '%s'. Using empty one",
- _defaultDocumentTypeName.c_str());
- map = std::make_shared<FieldPathMapT>();
- }
- return true;
-}
-
-void DocumentTypeMapping::buildFieldMap(
- const document::DocumentType *docTypePtr,
- const StringFieldIdTMapT & fieldList, const vespalib::string & typeId)
-{
- LOG(debug, "buildFieldMap: docType = '%s', fieldList.size = '%zd', typeId = '%s'",
- docTypePtr->getName().c_str(), fieldList.size(), typeId.c_str());
- const document::DocumentType & docType = *docTypePtr;
- size_t highestFNo(0);
- for (StringFieldIdTMapT::const_iterator it = fieldList.begin(), mt = fieldList.end(); it != mt; it++) {
- highestFNo = std::max(highestFNo, size_t(it->second));
- }
- highestFNo++;
- FieldPathMapT & fieldMap = _fieldMap[typeId];
-
- fieldMap.resize(highestFNo);
-
- size_t validCount(0);
- for (StringFieldIdTMapT::const_iterator it = fieldList.begin(), mt = fieldList.end(); it != mt; it++) {
- vespalib::string fname = it->first;
- LOG(debug, "Handling %s -> %d", fname.c_str(), it->second);
- try {
- if ((it->first[0] != '[') && (it->first != "summaryfeatures") && (it->first != "rankfeatures") && (it->first != "ranklog") && (it->first != "sddocname") && (it->first != "documentid")) {
- FieldPath fieldPath;
- docType.buildFieldPath(fieldPath, fname);
- fieldMap[it->second] = std::move(fieldPath);
- validCount++;
- LOG(spam, "Found %s -> %d in document", fname.c_str(), it->second);
- }
- } catch (const std::exception & e) {
- LOG(debug, "Could not get field info for '%s' in documenttype '%s' (id = '%s') : %s",
- it->first.c_str(), docType.getName().c_str(), typeId.c_str(), e.what());
- }
- }
- _documentTypeFreq.insert(std::make_pair(validCount, docTypePtr));
-}
-
-const document::DocumentType & DocumentTypeMapping::getCurrentDocumentType() const
-{
- if (_documentTypeFreq.empty()) {
- throw std::runtime_error("No document type registered yet.");
- }
- return *_documentTypeFreq.rbegin()->second;
-}
-
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.h b/streamingvisitors/src/vespa/vsm/common/documenttypemapping.h
deleted file mode 100644
index 607b40cec47..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/common/storagedocument.h>
-
-namespace document { class DocumentTypeRepo; }
-
-namespace vsm
-{
-
-class DocumentTypeMapping
-{
-public:
- DocumentTypeMapping();
- ~DocumentTypeMapping();
-
- /**
- * Prepares the given document by sharing the field info map
- * registered for that document type.
- **/
- bool prepareBaseDoc(SharedFieldPathMap & doc) const;
-
- /**
- * Builds a field info map for all registered document types.
- **/
- void init(const vespalib::string & defaultDocumentType,
- const StringFieldIdTMapT & fieldList,
- const document::DocumentTypeRepo &repo);
-
- const document::DocumentType & getCurrentDocumentType() const;
- const vespalib::string & getDefaultDocumentTypeName() const
- { return _defaultDocumentTypeName; }
- const document::DocumentType *getDefaultDocumentType() const
- { return _defaultDocumentType; }
-
-private:
- /**
- * Builds a field info map for the given type id. This is a
- * mapping from field id to field path and field value for all
- * field names in the given list based on the given document type.
- **/
- void buildFieldMap(const document::DocumentType *docType,
- const StringFieldIdTMapT & fieldList,
- const vespalib::string & typeId);
- typedef vespalib::hash_map<vespalib::string, FieldPathMapT> FieldPathMapMapT;
- typedef std::multimap<size_t, const document::DocumentType *> DocumentTypeUsage;
- FieldPathMapMapT _fieldMap;
- vespalib::string _defaultDocumentTypeName;
- const document::DocumentType *_defaultDocumentType;
- DocumentTypeUsage _documentTypeFreq;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp b/streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp
deleted file mode 100644
index b39afd83b5a..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "fieldmodifier.h"
-#include <vespa/vespalib/stllike/hash_map.hpp>
-
-namespace vsm {
-
-FieldModifierMap::FieldModifierMap() :
- _map()
-{ }
-
-FieldModifierMap::~FieldModifierMap() { }
-
-FieldModifier *
-FieldModifierMap::getModifier(FieldIdT fId) const
-{
- FieldModifierMapT::const_iterator itr = _map.find(fId);
- if (itr == _map.end()) {
- return NULL;
- }
- return itr->second.get();
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.h b/streamingvisitors/src/vespa/vsm/common/fieldmodifier.h
deleted file mode 100644
index 60e480fa237..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/document/fieldvalue/fieldvalue.h>
-#include <vespa/vsm/common/document.h>
-
-namespace vsm {
-
-/**
- * Interface for classes that want to modify a field value.
- **/
-class FieldModifier
-{
-public:
- typedef std::unique_ptr<FieldModifier> UP;
-
- /**
- * Modifies the given field value and returns a new one.
- **/
- virtual document::FieldValue::UP modify(const document::FieldValue & fv) = 0;
-
- /**
- * Modifies the given field value and returns a new one.
- * Use the given field path to iterate the field value.
- **/
- virtual document::FieldValue::UP modify(const document::FieldValue & fv,
- const document::FieldPath & path) = 0;
-
- virtual ~FieldModifier() { }
-};
-
-typedef vespalib::hash_map<FieldIdT, FieldModifier::UP> FieldModifierMapT;
-
-/**
- * This class wraps a map from field id to field modifier.
- **/
-class FieldModifierMap
-{
-private:
- FieldModifierMapT _map;
-
-public:
- FieldModifierMap();
- ~FieldModifierMap();
- FieldModifierMapT & map() { return _map; }
- const FieldModifierMapT & map() const { return _map; }
-
- /**
- * Returns the modifier associated with the given field id or NULL if not found.
- *
- * @param fId the field id to look up.
- * @return the field modifier or NULL if not found.
- **/
- FieldModifier * getModifier(FieldIdT fId) const;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/common/storagedocument.cpp b/streamingvisitors/src/vespa/vsm/common/storagedocument.cpp
deleted file mode 100644
index a0d666268f5..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/storagedocument.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "storagedocument.h"
-#include <vespa/document/fieldvalue/arrayfieldvalue.h>
-#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.storagedocument");
-
-using NestedIterator = document::FieldValue::PathRange;
-
-namespace vsm {
-
-StorageDocument::StorageDocument(document::Document::UP doc, const SharedFieldPathMap & fim, size_t fieldNoLimit) :
- Document(fieldNoLimit),
- _doc(std::move(doc)),
- _fieldMap(fim),
- _cachedFields(getFieldCount()),
- _backedFields()
-{ }
-
-StorageDocument::~StorageDocument() { }
-
-namespace {
- FieldPath _emptyFieldPath;
- StorageDocument::SubDocument _empySubDocument(NULL, _emptyFieldPath.getFullRange());
-}
-
-const StorageDocument::SubDocument &
-StorageDocument::getComplexField(FieldIdT fId) const
-{
- if (_cachedFields[fId].getFieldValue() == NULL) {
- const FieldPath & fp = (*_fieldMap)[fId];
- if ( ! fp.empty() ) {
- const document::StructuredFieldValue * sfv = _doc.get();
- NestedIterator nested = fp.getFullRange();
- const document::FieldPathEntry& fvInfo = nested.cur();
- bool ok = sfv->getValue(fvInfo.getFieldRef(), fvInfo.getFieldValueToSet());
- if (ok) {
- SubDocument tmp(&fvInfo.getFieldValueToSet(), nested.next());
- _cachedFields[fId].swap(tmp);
- }
- } else {
- LOG(debug, "Failed getting field fId %d.", fId);
- return _empySubDocument;
- }
- }
- return _cachedFields[fId];
-}
-
-void StorageDocument::saveCachedFields() const
-{
- size_t m(_cachedFields.size());
- _backedFields.reserve(m);
- for (size_t i(0); i < m; i++) {
- if (_cachedFields[i].getFieldValue() != 0) {
- _backedFields.emplace_back(document::FieldValue::UP(_cachedFields[i].getFieldValue()->clone()));
- _cachedFields[i].setFieldValue(_backedFields.back().get());
- }
- }
-}
-
-const document::FieldValue *
-StorageDocument::getField(FieldIdT fId) const
-{
- return getComplexField(fId).getFieldValue();
-}
-
-bool StorageDocument::setField(FieldIdT fId, document::FieldValue::UP fv)
-{
- bool ok(fId < _cachedFields.size());
- if (ok) {
- const FieldPath & fp = (*_fieldMap)[fId];
- SubDocument tmp(fv.get(), NestedIterator(fp.end(), fp.end()));
- _cachedFields[fId].swap(tmp);
- _backedFields.emplace_back(std::move(fv));
- }
- return ok;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/common/storagedocument.h b/streamingvisitors/src/vespa/vsm/common/storagedocument.h
deleted file mode 100644
index a7f21cb052f..00000000000
--- a/streamingvisitors/src/vespa/vsm/common/storagedocument.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "document.h"
-#include <vespa/document/fieldvalue/document.h>
-
-namespace vsm {
-
-typedef vespalib::CloneablePtr<document::FieldValue> FieldValueContainer;
-typedef document::FieldPath FieldPath; // field path to navigate a field value
-typedef std::vector<FieldPath> FieldPathMapT; // map from field id to field path
-typedef std::shared_ptr<FieldPathMapT> SharedFieldPathMap;
-
-class StorageDocument : public Document {
-public:
- typedef std::unique_ptr<StorageDocument> UP;
-
- class SubDocument {
- public:
- SubDocument() : _fieldValue(nullptr) {}
- SubDocument(document::FieldValue *fv, document::FieldValue::PathRange nested) :
- _fieldValue(fv),
- _range(nested)
- { }
-
- const document::FieldValue *getFieldValue() const { return _fieldValue; }
- void setFieldValue(document::FieldValue *fv) { _fieldValue = fv; }
- const document::FieldValue::PathRange & getRange() const { return _range; }
- void swap(SubDocument &rhs) {
- std::swap(_fieldValue, rhs._fieldValue);
- std::swap(_range, rhs._range);
- }
- private:
- FieldPath::const_iterator begin() const;
- FieldPath::const_iterator end() const;
- document::FieldValue *_fieldValue;
- document::FieldValue::PathRange _range;
- };
-public:
- StorageDocument(document::Document::UP doc, const SharedFieldPathMap &fim, size_t fieldNoLimit);
- StorageDocument(const StorageDocument &) = delete;
- StorageDocument & operator = (const StorageDocument &) = delete;
- ~StorageDocument();
-
- const document::Document &docDoc() const { return *_doc; }
- bool valid() const { return _doc.get() != nullptr; }
- const SubDocument &getComplexField(FieldIdT fId) const;
- const document::FieldValue *getField(FieldIdT fId) const override;
- bool setField(FieldIdT fId, document::FieldValue::UP fv) override ;
- void saveCachedFields() const;
-private:
- document::Document::UP _doc;
- SharedFieldPathMap _fieldMap;
- mutable std::vector<SubDocument> _cachedFields;
- mutable std::vector<document::FieldValue::UP> _backedFields;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/config/.gitignore b/streamingvisitors/src/vespa/vsm/config/.gitignore
deleted file mode 100644
index d58390943e2..00000000000
--- a/streamingvisitors/src/vespa/vsm/config/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-.depend
-Makefile
-config-*.cpp
-config-*.h
diff --git a/streamingvisitors/src/vespa/vsm/config/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/config/CMakeLists.txt
deleted file mode 100644
index fea0bafe6b2..00000000000
--- a/streamingvisitors/src/vespa/vsm/config/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_library(vsm_vconfig OBJECT
- SOURCES
- DEPENDS
-)
-vespa_generate_config(vsm_vconfig vsmfields.def)
-install_config_definition(vsmfields.def vespa.config.search.vsm.vsmfields.def)
-vespa_generate_config(vsm_vconfig vsm.def)
-install_config_definition(vsm.def vespa.config.search.vsm.vsm.def)
-vespa_generate_config(vsm_vconfig vsmsummary.def)
-install_config_definition(vsmsummary.def vespa.config.search.vsm.vsmsummary.def)
diff --git a/streamingvisitors/src/vespa/vsm/config/vsm-cfif.h b/streamingvisitors/src/vespa/vsm/config/vsm-cfif.h
deleted file mode 100644
index 22033aee232..00000000000
--- a/streamingvisitors/src/vespa/vsm/config/vsm-cfif.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/config/config-vsmfields.h>
-#include <vespa/vsm/config/config-vsm.h>
-#include <vespa/vsm/config/config-vsmsummary.h>
-#include <vespa/vespalib/util/ptrholder.h>
-
-using vespa::config::search::vsm::VsmConfig;
-using vespa::config::search::vsm::VsmsummaryConfig;
-using vespa::config::search::vsm::VsmfieldsConfig;
-
-namespace vsm {
-
-typedef vespalib::PtrHolder<VsmfieldsConfig> VsmfieldsHolder;
-typedef std::shared_ptr<VsmfieldsConfig> VsmfieldsHandle;
-
-typedef vespalib::PtrHolder<VsmConfig> VsmHolder;
-typedef std::shared_ptr<VsmConfig> VsmHandle;
-
-typedef vespalib::PtrHolder<VsmsummaryConfig> FastS_VsmsummaryHolder;
-typedef std::shared_ptr<VsmsummaryConfig> FastS_VsmsummaryHandle;
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/config/vsm.def b/streamingvisitors/src/vespa/vsm/config/vsm.def
deleted file mode 100644
index 1971f9e9574..00000000000
--- a/streamingvisitors/src/vespa/vsm/config/vsm.def
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-namespace=vespa.config.search.vsm
-
-## The document model for the documents used as input for the VSM
-doctype reference
-
-## Configuration for storage client used by VSM
-storagecfg reference
-
-## Config defining what search method should be applied to different
-## fields in the documents. It also contains a mapping from index name
-## to a set of fields making up that index.
-vsmfields reference
diff --git a/streamingvisitors/src/vespa/vsm/config/vsmfields.def b/streamingvisitors/src/vespa/vsm/config/vsmfields.def
deleted file mode 100644
index 5e943c9274d..00000000000
--- a/streamingvisitors/src/vespa/vsm/config/vsmfields.def
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-namespace=vespa.config.search.vsm
-
-## Level of verification applied to the documents received.
-documentverificationlevel int default=0
-
-## Set if one should ignore limit hits.
-searchall int default=1
-
-## The name of a field for which we are assigning a search method.
-## The field name refers directly to a field in the document model.
-fieldspec[].name string
-
-## The search method for a given field. Note: same field in 2 different document types must match on type if not a random result might be expected.
-fieldspec[].searchmethod enum { NONE, BOOL, AUTOUTF8, UTF8, SSE2UTF8, INT8, INT16, INT32, INT64, FLOAT16, FLOAT, DOUBLE, GEOPOS } default=AUTOUTF8
-fieldspec[].arg1 string default=""
-
-## Maximum number of chars to search per field.
-fieldspec[].maxlength int default=1048576
-
-## Type of the field
-fieldspec[].fieldtype enum {ATTRIBUTE, INDEX} default=INDEX
-
-## The name of a documenttype for which we are assigning a set of indexes.
-documenttype[].name string
-## The name of an index of a documenttype for which we are assigning a set of fields.
-documenttype[].index[].name string
-
-## The name of a field part of an index.
-## The field name refers directly to a field in the document model.
-documenttype[].index[].field[].name string
diff --git a/streamingvisitors/src/vespa/vsm/config/vsmsummary.def b/streamingvisitors/src/vespa/vsm/config/vsmsummary.def
deleted file mode 100644
index 5eb96624826..00000000000
--- a/streamingvisitors/src/vespa/vsm/config/vsmsummary.def
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-namespace=vespa.config.search.vsm
-
-## The name of the result class that should be generated for documents
-## returned from the VSM. If this value is empty, the first found
-## result class will be used.
-outputclass string default=""
-
-## Mapping of field names between the result class and the document
-## model. This value represents the name in the result class. Fields
-## not mentioned here will get the identity mapping.
-fieldmap[].summary string
-
-## Mapping of field names between the result class and the document
-## model. This field vector represents the names in the document model
-## that should be used as input when generating the summary field.
-fieldmap[].document[].field string
-
-## This command specifies how the document fields should be combined
-## when generating the summary field.
-fieldmap[].command enum { NONE, FLATTENJUNIPER, FLATTENSPACE } default=NONE
diff --git a/streamingvisitors/src/vespa/vsm/searcher/.gitignore b/streamingvisitors/src/vespa/vsm/searcher/.gitignore
deleted file mode 100644
index 95bc02923a9..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.exe
-*.ilk
-*.pdb
-.depend*
-Makefile
diff --git a/streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt
deleted file mode 100644
index 0a2a9ec21d2..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
- set(SSE2_FILES "fold.cpp")
-else()
- unset(SSE2_FILES)
-endif()
-
-vespa_add_library(vsm_vsmsearcher OBJECT
- SOURCES
- boolfieldsearcher.cpp
- fieldsearcher.cpp
- floatfieldsearcher.cpp
- ${SSE2_FILES}
- futf8strchrfieldsearcher.cpp
- geo_pos_field_searcher.cpp
- intfieldsearcher.cpp
- strchrfieldsearcher.cpp
- utf8flexiblestringfieldsearcher.cpp
- utf8strchrfieldsearcher.cpp
- utf8stringfieldsearcherbase.cpp
- utf8substringsearcher.cpp
- utf8substringsnippetmodifier.cpp
- utf8suffixstringfieldsearcher.cpp
- utf8exactstringfieldsearcher.cpp
- DEPENDS
- vsm_vconfig
-)
diff --git a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp
deleted file mode 100644
index 8c9b556e593..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "boolfieldsearcher.h"
-#include <vespa/document/fieldvalue/boolfieldvalue.h>
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-namespace {
-vespalib::stringref TRUE = "true";
-vespalib::stringref FALSE = "false";
-}
-
-std::unique_ptr<FieldSearcher>
-BoolFieldSearcher::duplicate() const
-{
- return std::make_unique<BoolFieldSearcher>(*this);
-}
-
-BoolFieldSearcher::BoolFieldSearcher(FieldIdT fId) :
- FieldSearcher(fId),
- _terms()
-{ }
-
-BoolFieldSearcher::~BoolFieldSearcher() = default;
-
-void BoolFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf)
-{
- _terms.clear();
- FieldSearcher::prepare(qtl, buf);
- for (const QueryTerm * qt : qtl) {
- if (TRUE == qt->getTerm()) {
- _terms.push_back(true);
- } else if (FALSE == qt->getTerm()) {
- _terms.push_back(false);
- } else {
- int64_t low;
- int64_t high;
- bool valid = qt->getAsIntegerTerm(low, high);
- _terms.push_back(valid && (low > 0));
- }
- }
-}
-
-void BoolFieldSearcher::onValue(const document::FieldValue & fv)
-{
- for(size_t j=0, jm(_terms.size()); j < jm; j++) {
- if (static_cast<const document::BoolFieldValue &>(fv).getValue() == _terms[j]) {
- addHit(*_qtl[j], 0);
- }
- }
- ++_words;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h
deleted file mode 100644
index f6afef9e507..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "fieldsearcher.h"
-
-namespace vsm {
-
-class BoolFieldSearcher : public FieldSearcher
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- BoolFieldSearcher(FieldIdT fId);
- ~BoolFieldSearcher();
- void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override;
- void onValue(const document::FieldValue & fv) override;
-private:
- std::vector<bool> _terms;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
deleted file mode 100644
index e69999b160e..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "fieldsearcher.h"
-#include <vespa/vsm/vsm/fieldsearchspec.h>
-#include <vespa/document/fieldvalue/arrayfieldvalue.h>
-#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.searcher.fieldsearcher");
-
-using search::byte;
-using search::streaming::Query;
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-using search::v16qi;
-
-namespace vsm {
-
-class force
-{
- public:
- force() { FieldSearcher::init(); }
-};
-
-static force __forceInit;
-
-byte FieldSearcher::_foldLowCase[256];
-byte FieldSearcher::_wordChar[256];
-
-FieldSearcherBase::FieldSearcherBase() :
- _qtl(),
- _qtlFastBuffer(),
- _qtlFastSize(0),
- _qtlFast(nullptr)
-{
-}
-
-FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) :
- _qtl(),
- _qtlFastBuffer(),
- _qtlFastSize(0),
- _qtlFast(nullptr)
-{
- prepare(org._qtl);
-}
-
-FieldSearcherBase::~FieldSearcherBase()
-{
-}
-
-FieldSearcherBase & FieldSearcherBase::operator = (const FieldSearcherBase & org)
-{
- if (this != &org) {
- prepare(org._qtl);
- }
- return *this;
-}
-
-void FieldSearcherBase::prepare(const QueryTermList & qtl)
-{
- _qtl = qtl;
- _qtlFastBuffer.resize(sizeof(*_qtlFast)*(_qtl.size()+1), 0x13);
- _qtlFast = reinterpret_cast<v16qi *>(reinterpret_cast<unsigned long>(&_qtlFastBuffer[0]+15) & ~0xf);
- _qtlFastSize = 0;
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- const QueryTerm & qt = **it;
- memcpy(&_qtlFast[_qtlFastSize++], qt.getTerm(), std::min(size_t(16), qt.termLen()));
- }
-}
-
-FieldSearcher::FieldSearcher(const FieldIdT & fId, bool defaultPrefix) :
- FieldSearcherBase(),
- _field(fId),
- _matchType(defaultPrefix ? PREFIX : REGULAR),
- _maxFieldLength(0x100000),
- _currentElementId(0),
- _currentElementWeight(1),
- _pureUsAsciiCount(0),
- _pureUsAsciiFieldCount(0),
- _anyUtf8Count(0),
- _anyUtf8FieldCount(0),
- _words(0),
- _badUtf8Count(0),
- _zeroCount(0)
-{
- zeroStat();
-}
-
-FieldSearcher::~FieldSearcher() = default;
-
-bool FieldSearcher::search(const StorageDocument & doc)
-{
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field());
- fInfo.setHitOffset(qt.getHitList().size());
- }
- onSearch(doc);
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field());
- fInfo.setHitCount(qt.getHitList().size() - fInfo.getHitOffset());
- fInfo.setFieldLength(_words);
- }
- _words = 0;
- return true;
-}
-
-void FieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & UNUSED_PARAM(buf))
-{
- FieldSearcherBase::prepare(qtl);
- prepareFieldId();
-}
-
-size_t FieldSearcher::countWords(const FieldRef & f)
-{
- size_t words = 0;
- const char * n = f.data();
- const char * e = n + f.size();
- for( ; n < e; ++n) {
- for (; isspace(*n) && (n<e); ++n);
- const char * m = n;
- for (; iswordchar(*n) && (n<e); ++n);
- if (n > m) {
- words++;
- }
- }
- return words;
-}
-
-void FieldSearcher::prepareFieldId()
-{
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- qt.resizeFieldId(field());
- }
-}
-
-void FieldSearcher::addStat(const FieldSearcher & toAdd)
-{
- _pureUsAsciiCount += toAdd._pureUsAsciiCount;
- _pureUsAsciiFieldCount += toAdd._pureUsAsciiFieldCount;
- _anyUtf8Count += toAdd._anyUtf8Count;
- _anyUtf8FieldCount += toAdd._anyUtf8FieldCount;
- _badUtf8Count += toAdd._badUtf8Count;
- _zeroCount += toAdd._zeroCount;
- for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] += toAdd._utf8Count[i]; }
-}
-
-void FieldSearcher::zeroStat()
-{
- _pureUsAsciiCount = 0;
- _pureUsAsciiFieldCount = 0;
- _anyUtf8Count = 0;
- _anyUtf8FieldCount = 0;
- _badUtf8Count = 0;
- _zeroCount = 0;
- for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] = 0; }
-}
-
-void FieldSearcher::init()
-{
- for (unsigned i = 0; i < NELEMS(_foldLowCase); i++) {
- _foldLowCase[i] = 0;
- _wordChar[i] = 0;
- }
- for (int i = 'A'; i <= 'Z'; i++) {
- _wordChar[i] = 0xFF;
- _foldLowCase[i] = i | 0x20;
- }
- for (int i = 'a'; i <= 'z'; i++) {
- _wordChar[i] = 0xFF;
- _foldLowCase[i] = i;
- }
- for (int i = '0'; i <= '9'; i++) {
- _wordChar[i] = 0xFF;
- _foldLowCase[i] = i;
- }
- for (int i = 0xC0; i <= 0xFF; i++) {
- _wordChar[i] = 0xFF;
- }
- _wordChar[0xd7] = 0;
- _wordChar[0xf7] = 0;
-
- if (1) /* _doAccentRemoval */ {
- _foldLowCase[0xc0] = 'a';
- _foldLowCase[0xc1] = 'a';
- _foldLowCase[0xc2] = 'a';
- _foldLowCase[0xc3] = 'a'; // A tilde
- _foldLowCase[0xc7] = 'c';
- _foldLowCase[0xc8] = 'e';
- _foldLowCase[0xc9] = 'e';
- _foldLowCase[0xca] = 'e';
- _foldLowCase[0xcb] = 'e';
- _foldLowCase[0xcc] = 'i'; // I grave
- _foldLowCase[0xcd] = 'i';
- _foldLowCase[0xce] = 'i';
- _foldLowCase[0xcf] = 'i';
- _foldLowCase[0xd3] = 'o';
- _foldLowCase[0xd4] = 'o';
- _foldLowCase[0xda] = 'u';
- _foldLowCase[0xdb] = 'u';
-
- _foldLowCase[0xe0] = 'a';
- _foldLowCase[0xe1] = 'a';
- _foldLowCase[0xe2] = 'a';
- _foldLowCase[0xe3] = 'a'; // a tilde
- _foldLowCase[0xe7] = 'c';
- _foldLowCase[0xe8] = 'e';
- _foldLowCase[0xe9] = 'e';
- _foldLowCase[0xea] = 'e';
- _foldLowCase[0xeb] = 'e';
- _foldLowCase[0xec] = 'i'; // i grave
- _foldLowCase[0xed] = 'i';
- _foldLowCase[0xee] = 'i';
- _foldLowCase[0xef] = 'i';
- _foldLowCase[0xf3] = 'o';
- _foldLowCase[0xf4] = 'o';
- _foldLowCase[0xfa] = 'u';
- _foldLowCase[0xfb] = 'u';
- }
-}
-
-void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT & difm, const SharedSearcherBuf & searcherBuf, Query & query)
-{
- QueryTermList qtl;
- query.getLeafs(qtl);
- vespalib::string tmp;
- for (FieldIdTSearcherMap::iterator it = begin(), mt = end(); it != mt; it++) {
- QueryTermList onlyInIndex;
- FieldIdT fid = (*it)->field();
- for (QueryTermList::iterator qt = qtl.begin(), mqt = qtl.end(); qt != mqt; qt++) {
- QueryTerm * q = *qt;
- for (DocumentTypeIndexFieldMapT::const_iterator dt(difm.begin()), dmt(difm.end()); dt != dmt; dt++) {
- const IndexFieldMapT & fim = dt->second;
- IndexFieldMapT::const_iterator found = fim.find(FieldSearchSpecMap::stripNonFields(q->index()));
- if (found != fim.end()) {
- const FieldIdTList & index = found->second;
- if ((find(index.begin(), index.end(), fid) != index.end()) && (find(onlyInIndex.begin(), onlyInIndex.end(), q) == onlyInIndex.end())) {
- onlyInIndex.push_back(q);
- }
- } else {
- LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", q->index().c_str());
- }
- }
- }
- /// Should perhaps do a unique on onlyInIndex
- (*it)->prepare(onlyInIndex, searcherBuf);
- if (logger.wants(ns_log::Logger::spam)) {
- char tmpBuf[16];
- sprintf(tmpBuf,"%d", fid);
- tmp += tmpBuf;
- tmp += ", ";
- }
- }
- LOG(debug, "Will search in %s", tmp.c_str());
-}
-
-bool FieldSearcher::onSearch(const StorageDocument & doc)
-{
- bool retval(true);
- size_t fNo(field());
- const StorageDocument::SubDocument & sub = doc.getComplexField(fNo);
- if (sub.getFieldValue() != nullptr) {
- IteratorHandler ih(*this);
- sub.getFieldValue()->iterateNested(sub.getRange(), ih);
- }
- return retval;
-}
-
-void
-FieldSearcher::IteratorHandler::onPrimitive(uint32_t, const Content & c)
-{
- LOG(spam, "onPrimitive: field value '%s'", c.getValue().toString().c_str());
- _searcher.setCurrentWeight(c.getWeight());
- _searcher.setCurrentElementId(getArrayIndex());
- _searcher.onValue(c.getValue());
-}
-
-void
-FieldSearcher::IteratorHandler::onCollectionStart(const Content & c)
-{
- const document::FieldValue & fv = c.getValue();
- LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str());
- if (fv.isA(document::FieldValue::Type::ARRAY)) {
- const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(fv);
- LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size());
- } else if (fv.isA(document::FieldValue::Type::WSET)) {
- const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv);
- LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size());
- }
-}
-
-void
-FieldSearcher::IteratorHandler::onStructStart(const Content & c)
-{
- LOG(spam, "onStructStart: field value '%s'", c.getValue().toString().c_str());
- _searcher.onStructValue(static_cast<const document::StructFieldValue &>(c.getValue()));
-}
-
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
deleted file mode 100644
index 5c2ef8fec28..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/document/fieldvalue/iteratorhandler.h>
-#include <vespa/searchlib/query/streaming/query.h>
-#include <vespa/vsm/common/document.h>
-#include <vespa/vsm/common/storagedocument.h>
-
-namespace vsm {
-
-typedef size_t termcount_t;
-typedef size_t termsize_t;
-
-#if defined(COLLECT_CHAR_STAT)
- #define NEED_CHAR_STAT(a) { a; }
-#else
- #define NEED_CHAR_STAT(a)
-#endif
-
-typedef ucs4_t cmptype_t;
-typedef vespalib::Array<cmptype_t> SearcherBuf;
-typedef std::shared_ptr<SearcherBuf> SharedSearcherBuf;
-typedef std::vector<char> CharVector;
-
-class FieldSearcherBase
-{
-protected:
- search::streaming::QueryTermList _qtl;
-private:
- CharVector _qtlFastBuffer;
-protected:
- FieldSearcherBase();
- FieldSearcherBase(const FieldSearcherBase & org);
- virtual ~FieldSearcherBase(void);
- FieldSearcherBase & operator = (const FieldSearcherBase & org);
- void prepare(const search::streaming::QueryTermList & qtl);
- size_t _qtlFastSize;
- search::v16qi *_qtlFast;
-};
-
-class FieldSearcher : public FieldSearcherBase
-{
-public:
- enum MatchType {
- REGULAR,
- PREFIX,
- SUBSTRING,
- SUFFIX,
- EXACT
- };
-
- FieldSearcher(const FieldIdT & fId, bool defaultPrefix=false);
- ~FieldSearcher() override;
- virtual std::unique_ptr<FieldSearcher> duplicate() const = 0;
- bool search(const StorageDocument & doc);
- virtual void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf);
- const FieldIdT & field() const { return _field; }
- void field(const FieldIdT & v) { _field = v; prepareFieldId(); }
- bool prefix() const { return _matchType == PREFIX; }
- bool substring() const { return _matchType == SUBSTRING; }
- bool suffix() const { return _matchType == SUFFIX; }
- bool exact() const { return _matchType == EXACT; }
- void setMatchType(MatchType mt) { _matchType = mt; }
- static void init();
- static search::byte fold(search::byte c) { return _foldLowCase[c]; }
- static search::byte iswordchar(search::byte c) { return _wordChar[c]; }
- static search::byte isspace(search::byte c) { return ! iswordchar(c); }
- static size_t countWords(const FieldRef & f);
- unsigned pureUsAsciiCount() const { return _pureUsAsciiCount; }
- unsigned pureUsAsciiFieldCount() const { return _pureUsAsciiFieldCount; }
- unsigned anyUtf8Count() const { return _anyUtf8Count; }
- unsigned anyUtf8FieldCount() const { return _anyUtf8FieldCount; }
- unsigned badUtf8Count() const { return _badUtf8Count; }
- unsigned zeroCount() const { return _zeroCount; }
- unsigned utf8Count(size_t sz) const { return _utf8Count[1+sz]; }
- const unsigned * utf8Count() const { return _utf8Count; }
- int32_t getCurrentWeight() const { return _currentElementWeight; }
- void addStat(const FieldSearcher & toAdd);
- void zeroStat();
- FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; }
- size_t maxFieldLength() const { return _maxFieldLength; }
-
-private:
- class IteratorHandler : public document::fieldvalue::IteratorHandler {
- private:
- FieldSearcher & _searcher;
-
- void onPrimitive(uint32_t fid, const Content & c) override;
- void onCollectionStart(const Content & c) override;
- void onStructStart(const Content & c) override;
-
- public:
- IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {}
- };
- friend class IteratorHandler; // to allow calls to onValue();
-
- void prepareFieldId();
- void setCurrentWeight(int32_t weight) { _currentElementWeight = weight; }
- void setCurrentElementId(int32_t weight) { _currentElementId = weight; }
- bool onSearch(const StorageDocument & doc);
- virtual void onValue(const document::FieldValue & fv) = 0;
- virtual void onStructValue(const document::StructFieldValue &) { }
- FieldIdT _field;
- MatchType _matchType;
- unsigned _maxFieldLength;
- uint32_t _currentElementId;
- int32_t _currentElementWeight; // Contains the weight of the current item being evaluated.
- /// Number of bytes in blocks containing pure us-ascii
- unsigned _pureUsAsciiCount;
- /// Number of blocks containing pure us-ascii
- unsigned _pureUsAsciiFieldCount;
- /// Number of bytes in blocks containing any non us-ascii
- unsigned _anyUtf8Count;
- /// Number of blocks containing any non us-ascii
- unsigned _anyUtf8FieldCount;
-protected:
- /// Number of terms searched.
- unsigned _words;
- /// Number of utf8 bytes by utf8 size.
- unsigned _utf8Count[6];
- unsigned _badUtf8Count;
- unsigned _zeroCount;
-protected:
- void addPureUsAsciiField(size_t sz) { _pureUsAsciiCount += sz; _pureUsAsciiFieldCount++;; }
- void addAnyUtf8Field(size_t sz) { _anyUtf8Count += sz; _anyUtf8FieldCount++; }
- /**
- * Adds a hit to the given query term.
- * For each call to onValue() a batch of words are processed, and the position is local to this batch.
- **/
- void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const {
- qt.add(_words + pos, field(), _currentElementId, getCurrentWeight());
- }
-public:
- static search::byte _foldLowCase[256];
- static search::byte _wordChar[256];
-};
-
-typedef std::unique_ptr<FieldSearcher> FieldSearcherContainer;
-typedef std::vector<FieldSearcherContainer> FieldIdTSearcherMapT;
-
-class FieldIdTSearcherMap : public FieldIdTSearcherMapT
-{
-public:
- void prepare(const DocumentTypeIndexFieldMapT & difm, const SharedSearcherBuf & searcherBuf, search::streaming::Query & query);
-};
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp
deleted file mode 100644
index 02d8bd8c12a..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "floatfieldsearcher.h"
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-FloatFieldSearcher::duplicate() const
-{
- return std::make_unique<FloatFieldSearcher>(*this);
-}
-
-std::unique_ptr<FieldSearcher>
-DoubleFieldSearcher::duplicate() const
-{
- return std::make_unique<DoubleFieldSearcher>(*this);
-}
-
-template<typename T>
-FloatFieldSearcherT<T>::FloatFieldSearcherT(FieldIdT fId) :
- FieldSearcher(fId),
- _floatTerm()
-{}
-
-template<typename T>
-FloatFieldSearcherT<T>::~FloatFieldSearcherT() {}
-
-template<typename T>
-void FloatFieldSearcherT<T>::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf)
-{
- _floatTerm.clear();
- FieldSearcher::prepare(qtl, buf);
- for (QueryTermList::const_iterator it=qtl.begin(); it < qtl.end(); it++) {
- const QueryTerm * qt = *it;
- size_t sz(qt->termLen());
- if (sz) {
- double low;
- double high;
- bool valid = qt->getAsDoubleTerm(low, high);
- _floatTerm.push_back(FloatInfo(low, high, valid));
- }
- }
-}
-
-
-template<typename T>
-void FloatFieldSearcherT<T>::onValue(const document::FieldValue & fv)
-{
- for(size_t j=0, jm(_floatTerm.size()); j < jm; j++) {
- const FloatInfo & ii = _floatTerm[j];
- if (ii.valid() && (ii.cmp(fv.getAsDouble()))) {
- addHit(*_qtl[j], 0);
- }
- }
- ++_words;
-}
-
-template<typename T>
-bool FloatFieldSearcherT<T>::FloatInfo::cmp(T key) const
-{
- return (_lower <= key) && (key <= _upper);
-}
-
-template class FloatFieldSearcherT<float>;
-template class FloatFieldSearcherT<double>;
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h
deleted file mode 100644
index 98018fbf4a3..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "fieldsearcher.h"
-
-namespace vsm {
-
-template <typename T>
-class FloatFieldSearcherT : public FieldSearcher
-{
-public:
- FloatFieldSearcherT(FieldIdT fId=0);
- ~FloatFieldSearcherT();
- void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override;
- void onValue(const document::FieldValue & fv) override;
-protected:
- class FloatInfo
- {
- public:
- FloatInfo(T low, T high, bool v) : _lower(low), _upper(high), _valid(v) { if (low > high) { _lower = high; _upper = low; } }
- bool cmp(T key) const;
- bool valid() const { return _valid; }
- void setValid(bool v) { _valid = v; }
- T getLow() const { return _lower; }
- T getHigh() const { return _upper; }
- private:
- T _lower;
- T _upper;
- bool _valid;
- };
- typedef std::vector<FloatInfo> FloatInfoListT;
- FloatInfoListT _floatTerm;
-};
-
-typedef FloatFieldSearcherT<float> FloatFieldSearcherTF;
-typedef FloatFieldSearcherT<double> FloatFieldSearcherTD;
-
-class FloatFieldSearcher : public FloatFieldSearcherTF
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- FloatFieldSearcher(FieldIdT fId=0) : FloatFieldSearcherTF(fId) { }
-};
-
-class DoubleFieldSearcher : public FloatFieldSearcherTD
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- DoubleFieldSearcher(FieldIdT fId=0) : FloatFieldSearcherTD(fId) { }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fold.cpp b/streamingvisitors/src/vespa/vsm/searcher/fold.cpp
deleted file mode 100644
index bd2392d3ad6..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/fold.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-//
-#include "fold.h"
-
-namespace vsm {
-
-const unsigned char * sse2_foldaa(const unsigned char * toFoldOrg, size_t sz, unsigned char * foldedOrg)
-{
- typedef char v16qi __attribute__ ((__vector_size__(16)));
- typedef long long v2di __attribute__ ((__vector_size__(16)));
- static v16qi _G_0 = { '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1 };
- static v16qi _G_9 = { '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9' };
- static v16qi _G_a = { 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1 };
- static v16qi _G_z = { 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z' };
- static v16qi _G_8bit = { (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4,
- (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4 };
- static v2di _G_lowCase = { 0x2020202020202020ULL, 0x2020202020202020ULL };
- const v16qi *toFold = reinterpret_cast<const v16qi *>(toFoldOrg);
- v2di * folded = reinterpret_cast<v2di *>(foldedOrg);
- size_t i=0;
- for (size_t m=sz/16; i < m; i++)
- {
-#ifndef __INTEL_COMPILER
- int nonAscii = __builtin_ia32_pmovmskb128(toFold[i]);
- if (nonAscii)
- {
-#ifdef __clang__
- v16qi non8Mask = _G_8bit > toFold[i];
-#else
- v16qi non8Mask = __builtin_ia32_pcmpgtb128(_G_8bit, toFold[i]);
-#endif
- int non8bit = __builtin_ia32_pmovmskb128(non8Mask);
- if (non8bit)
- {
- break;
- }
- break;
- }
-#ifdef __clang__
- v16qi _0 = toFold[i] > _G_0;
- v16qi _z = toFold[i] > _G_z;
- v2di _0_z = v2di(_0) ^ v2di(_z);
- v2di toLow = _0_z & v2di(toFold[i]);
- v16qi low = v16qi(toLow | _G_lowCase);
- _0 = low > _G_0;
- v16qi _9 = low > _G_9;
- v16qi _a = low > _G_a;
- _z = low > _G_z;
- v2di _0_9_m = v2di(_0) ^ v2di(_9);
- v2di _a_z_m = v2di(_a) ^ v2di(_z);
- v2di _0_9 = _0_9_m & v2di(low);
- v2di _a_z = _a_z_m & v2di(low);
- folded[i] = _0_9 | _a_z;
-#else
- v16qi _0 = __builtin_ia32_pcmpgtb128(toFold[i], _G_0);
- v16qi _z = __builtin_ia32_pcmpgtb128(toFold[i], _G_z);
- v2di _0_z = __builtin_ia32_pxor128(v2di(_0), v2di(_z));
- v2di toLow = __builtin_ia32_pand128(_0_z, v2di(toFold[i]));
- v16qi low = v16qi(__builtin_ia32_por128(toLow, _G_lowCase));
- _0 = __builtin_ia32_pcmpgtb128(low, _G_0);
- v16qi _9 = __builtin_ia32_pcmpgtb128(low, _G_9);
- v16qi _a = __builtin_ia32_pcmpgtb128(low, _G_a);
- _z = __builtin_ia32_pcmpgtb128(low, _G_z);
- v2di _0_9_m = __builtin_ia32_pxor128(v2di(_0), v2di(_9));
- v2di _a_z_m = __builtin_ia32_pxor128(v2di(_a), v2di(_z));
- v2di _0_9 = __builtin_ia32_pand128(_0_9_m, v2di(low));
- v2di _a_z = __builtin_ia32_pand128(_a_z_m, v2di(low));
- folded[i] = __builtin_ia32_por128(_0_9, _a_z);
-#endif
-#else
-# warning "Intel's icc compiler does not like __builtin_ia32_pxor128"
- LOG_ABORT("should not be reached");
-#endif
- }
- return toFoldOrg+i*16;
-}
-
-const unsigned char * sse2_foldua(const unsigned char * toFoldOrg, size_t sz, unsigned char * foldedOrg)
-{
- typedef char v16qi __attribute__ ((__vector_size__(16)));
- typedef long long v2di __attribute__ ((__vector_size__(16)));
- static v16qi _G_0 = { '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1 };
- static v16qi _G_9 = { '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9' };
- static v16qi _G_a = { 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1 };
- static v16qi _G_z = { 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z' };
- static v16qi _G_8bit = { (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4,
- (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4 };
- static v2di _G_lowCase = { 0x2020202020202020ULL, 0x2020202020202020ULL };
- v2di * folded = reinterpret_cast<v2di *>(foldedOrg);
- size_t i=0;
- for (size_t m=sz/16; i < m; i++)
- {
-#ifndef __INTEL_COMPILER
-#ifdef __clang__
- v16qi current = __builtin_ia32_lddqu(reinterpret_cast<const char *>(&toFoldOrg[i*16]));
-#else
- v16qi current = __builtin_ia32_loaddqu(reinterpret_cast<const char *>(&toFoldOrg[i*16]));
-#endif
- int nonAscii = __builtin_ia32_pmovmskb128(current);
- if (nonAscii)
- {
-#ifdef __clang__
- v16qi non8Mask = _G_8bit > current;
-#else
- v16qi non8Mask = __builtin_ia32_pcmpgtb128(_G_8bit, current);
-#endif
- int non8bit = __builtin_ia32_pmovmskb128(non8Mask);
- if (non8bit)
- {
- break;
- }
- break;
- }
-#ifdef __clang__
- v16qi _0 = current > _G_0;
- v16qi _z = current > _G_z;
- v2di _0_z = v2di(_0) ^ v2di(_z);
- v2di toLow = _0_z & v2di(current);
- v16qi low = v16qi(toLow | _G_lowCase);
- _0 = low > _G_0;
- v16qi _9 = low > _G_9;
- v16qi _a = low > _G_a;
- _z = low > _G_z;
- v2di _0_9_m = v2di(_0) ^ v2di(_9);
- v2di _a_z_m = v2di(_a) ^ v2di(_z);
- v2di _0_9 = _0_9_m & v2di(low);
- v2di _a_z = _a_z_m & v2di(low);
- folded[i] = _0_9 | _a_z;
-#else
- v16qi _0 = __builtin_ia32_pcmpgtb128(current, _G_0);
- v16qi _z = __builtin_ia32_pcmpgtb128(current, _G_z);
- v2di _0_z = __builtin_ia32_pxor128(v2di(_0), v2di(_z));
- v2di toLow = __builtin_ia32_pand128(_0_z, v2di(current));
- v16qi low = v16qi(__builtin_ia32_por128(toLow, _G_lowCase));
- _0 = __builtin_ia32_pcmpgtb128(low, _G_0);
- v16qi _9 = __builtin_ia32_pcmpgtb128(low, _G_9);
- v16qi _a = __builtin_ia32_pcmpgtb128(low, _G_a);
- _z = __builtin_ia32_pcmpgtb128(low, _G_z);
- v2di _0_9_m = __builtin_ia32_pxor128(v2di(_0), v2di(_9));
- v2di _a_z_m = __builtin_ia32_pxor128(v2di(_a), v2di(_z));
- v2di _0_9 = __builtin_ia32_pand128(_0_9_m, v2di(low));
- v2di _a_z = __builtin_ia32_pand128(_a_z_m, v2di(low));
- folded[i] = __builtin_ia32_por128(_0_9, _a_z);
-#endif
-#else
-# warning "Intel's icc compiler does not like __builtin_ia32_pxor128"
- LOG_ABORT("should not be reached");
-#endif
- }
- return toFoldOrg+i*16;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fold.h b/streamingvisitors/src/vespa/vsm/searcher/fold.h
deleted file mode 100644
index 578b883484f..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/fold.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/common/document.h>
-
-namespace vsm {
-
-const search::byte * sse2_foldaa(const search::byte * toFoldOrg, size_t sz, search::byte * foldedOrg);
-const search::byte * sse2_foldua(const search::byte * toFoldOrg, size_t sz, search::byte * foldedOrg);
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp
deleted file mode 100644
index fc5d77de419..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "futf8strchrfieldsearcher.h"
-#ifdef __x86_64__
-#include "fold.h"
-#endif
-#include <vespa/vespalib/util/size_literals.h>
-
-using search::byte;
-using search::streaming::QueryTerm;
-using search::v16qi;
-using vespalib::Optimized;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-FUTF8StrChrFieldSearcher::duplicate() const
-{
- return std::make_unique<FUTF8StrChrFieldSearcher>(*this);
-}
-
-FUTF8StrChrFieldSearcher::FUTF8StrChrFieldSearcher()
- : UTF8StrChrFieldSearcher(),
- _folded(4_Ki)
-{ }
-FUTF8StrChrFieldSearcher::FUTF8StrChrFieldSearcher(FieldIdT fId)
- : UTF8StrChrFieldSearcher(fId),
- _folded(4_Ki)
-{ }
-FUTF8StrChrFieldSearcher::~FUTF8StrChrFieldSearcher() {}
-
-bool
-FUTF8StrChrFieldSearcher::ansiFold(const char * toFold, size_t sz, char * folded)
-{
- bool retval(true);
- for(size_t i=0; i < sz; i++) {
- byte c = toFold[i];
- if (c>=128) { retval = false; break; }
- folded[i] = FieldSearcher::_foldLowCase[c];
- }
- return retval;
-}
-
-bool
-FUTF8StrChrFieldSearcher::lfoldaa(const char * toFold, size_t sz, char * folded, size_t & unalignedStart)
-{
- unalignedStart = (size_t(toFold) & 0xF);
-#ifdef __x86_64__
- bool retval(true);
- size_t unalignedsz = std::min(sz, (16 - unalignedStart) & 0xF);
-
- size_t foldedUnaligned = (size_t(folded) & 0xF);
- unalignedStart = (foldedUnaligned < unalignedStart) ? (unalignedStart-foldedUnaligned) : unalignedStart + 16 - foldedUnaligned;
- size_t alignedStart = unalignedStart+unalignedsz;
-
- size_t alignedsz = sz - unalignedsz;
- size_t alignsz16 = alignedsz & 0xFFFFFFF0;
- size_t rest = alignedsz - alignsz16;
-
- if (unalignedStart) {
- retval = ansiFold(toFold, unalignedsz, folded + unalignedStart);
- }
- if (alignsz16 && retval) {
- const byte * end = sse2_foldaa(reinterpret_cast<const byte *>(toFold+unalignedsz), alignsz16, reinterpret_cast<byte *>(folded+alignedStart));
- retval = (end == reinterpret_cast<const byte *>(toFold+unalignedsz+alignsz16));
- }
- if(rest && retval) {
- retval = ansiFold(toFold + unalignedsz + alignsz16, rest, folded+alignedStart+alignsz16);
- }
- return retval;
-#else
- return ansiFold(toFold, sz, folded + unalignedStart);
-#endif
-}
-
-bool
-FUTF8StrChrFieldSearcher::lfoldua(const char * toFold, size_t sz, char * folded, size_t & alignedStart)
-{
- alignedStart = 0xF - (size_t(folded + 0xF) % 0x10);
-#ifdef __x86_64__
- bool retval(true);
-
- size_t alignsz16 = sz & 0xFFFFFFF0;
- size_t rest = sz - alignsz16;
-
- if (alignsz16) {
- const byte * end = sse2_foldua(reinterpret_cast<const byte *>(toFold), alignsz16, reinterpret_cast<byte *>(folded+alignedStart));
- retval = (end == reinterpret_cast<const byte *>(toFold+alignsz16));
- }
- if(rest && retval) {
- retval = ansiFold(toFold + alignsz16, rest, folded+alignedStart+alignsz16);
- }
- return retval;
-#else
- return ansiFold(toFold, sz, folded + alignedStart);
-#endif
-}
-
-namespace {
-
-#ifdef __x86_64__
-inline const char * advance(const char * n, const v16qi zero)
-{
- uint32_t charMap = 0;
- unsigned zeroCountSum = 0;
- do { // find first '\0' character (the end of the word)
-#ifndef __INTEL_COMPILER
-#ifdef __clang__
- v16qi tmpCurrent = __builtin_ia32_lddqu(n+zeroCountSum);
- v16qi tmp0 = tmpCurrent == zero;
-#else
- v16qi tmpCurrent = __builtin_ia32_loaddqu(n+zeroCountSum);
- v16qi tmp0 = __builtin_ia32_pcmpeqb128(tmpCurrent, reinterpret_cast<v16qi>(zero));
-#endif
- charMap = __builtin_ia32_pmovmskb128(tmp0); // 1 in charMap equals to '\0' in input buffer
-#else
-# warning "Intel's icc compiler does not like __builtin_ia32_xxxxx"
- LOG_ABORT("should not be reached");
-#endif
- zeroCountSum += 16;
- } while (!charMap);
- int charCount = Optimized::lsbIdx(charMap); // number of word characters in last 16 bytes
- uint32_t zeroMap = ((~charMap) & 0xffff) >> charCount;
-
- int zeroCounter = Optimized::lsbIdx(zeroMap); // number of non-characters ('\0') in last 16 bytes
- int sum = zeroCountSum - 16 + charCount + zeroCounter;
- if (!zeroMap) { // only '\0' in last 16 bytes (no new word found)
- do { // find first word character (the next word)
-#ifndef __INTEL_COMPILER
-#ifdef __clang__
- v16qi tmpCurrent = __builtin_ia32_lddqu(n+zeroCountSum);
- tmpCurrent = tmpCurrent > zero;
-#else
- v16qi tmpCurrent = __builtin_ia32_loaddqu(n+zeroCountSum);
- tmpCurrent = __builtin_ia32_pcmpgtb128(tmpCurrent, reinterpret_cast<v16qi>(zero));
-#endif
- zeroMap = __builtin_ia32_pmovmskb128(tmpCurrent); // 1 in zeroMap equals to word character in input buffer
-#else
-# warning "Intel's icc compiler does not like __builtin_ia32_xxxxx"
- LOG_ABORT("should not be reached");
-#endif
- zeroCountSum += 16;
- } while(!zeroMap);
- zeroCounter = Optimized::lsbIdx(zeroMap);
- sum = zeroCountSum - 16 + zeroCounter;
- }
- return n + sum;
-}
-#else
-inline const char* advance(const char* n)
-{
- const char* p = n;
- const char* zero = static_cast<const char *>(memchr(p, 0, 64_Ki));
- while (zero == nullptr) {
- p += 64_Ki;
- zero = static_cast<const char *>(memchr(p, 0, 64_Ki));
- }
- p = zero;
- while (*p == '\0') {
- ++p;
- }
- return p;
-}
-#endif
-
-}
-
-size_t FUTF8StrChrFieldSearcher::match(const char *folded, size_t sz, QueryTerm & qt)
-{
-#ifdef __x86_64__
- const v16qi _G_zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-#endif
- termcount_t words(0);
- const char * term;
- termsize_t tsz = qt.term(term);
- const char *et=term+tsz;
- const char * n = folded;
- const char *e = n + sz;
-
- while (!*n) n++;
- while (true) {
- if (n>=e) break;
-
-#if 0
- v16qi current = __builtin_ia32_loaddqu(n);
- current = __builtin_ia32_pcmpeqb128(current, _qtlFast[0]);
- unsigned eqMap = __builtin_ia32_pmovmskb128(current);
- unsigned neqMap = ~eqMap;
- unsigned numEq = Optimized::lsbIdx(neqMap);
- /* if (eqMap)*/ {
- if (numEq >= 16) {
- const char *tt = term+16;
- const char *p = n+16;
- while ( (*tt == *p) && (tt < et)) { tt++; p++; numEq++; }
- }
- if ((numEq >= tsz) && (prefix() || qt.isPrefix() || !n[tsz])) {
- addHit(qt, words);
- }
- }
-#else
- const char *tt = term;
- while ((tt < et) && (*tt == *n)) { tt++; n++; }
- if ((tt == et) && (prefix() || qt.isPrefix() || !*n)) {
- addHit(qt, words);
- }
-#endif
- words++;
-#ifdef __x86_64__
- n = advance(n, _G_zero);
-#else
- n = advance(n);
-#endif
- }
- return words;
-}
-
-size_t FUTF8StrChrFieldSearcher::match(const char *folded, size_t sz, size_t mintsz, QueryTerm ** qtl, size_t qtlSize)
-{
- (void) mintsz;
-#ifdef __x86_64__
- const v16qi _G_zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-#endif
- termcount_t words(0);
- const char * n = folded;
- const char *e = n + sz;
- while (!*n) n++;
- for( ; ; ) {
- if (n>=e) break;
-#if 0
- v16qi current = __builtin_ia32_loaddqu(n);
- for(size_t i=0; i < qtlSize; i++) {
- v16qi tmpEq = __builtin_ia32_pcmpeqb128(current, _qtlFast[i]);
- unsigned eqMap = __builtin_ia32_pmovmskb128(tmpEq);
- /* if (eqMap) */ {
- QueryTerm & qt = *qtl[i];
- unsigned neqMap = ~eqMap;
- unsigned numEq = Optimized::lsbIdx(neqMap);
- termsize_t tsz = qt.termLen();
- if (numEq >= 16) {
- const char *tt = qt.term() + 16;
- const char *et=tt+tsz;
- const char *p = n+16;
- while ( (*tt == *p) && (tt < et)) { tt++; p++; numEq++; }
- }
- if ((numEq >= tsz) && (prefix() || qt.isPrefix() || !n[tsz])) {
- addHit(qt, words);
- }
- }
- }
-#else
- for(QueryTerm ** it=qtl, ** mt=qtl+qtlSize; it != mt; it++) {
- QueryTerm & qt = **it;
- const char * term;
- termsize_t tsz = qt.term(term);
-
- const char *et=term+tsz;
- const char *fnt;
- for (fnt = n; (term < et) && (*term == *fnt); term++, fnt++);
- if ((term == et) && (prefix() || qt.isPrefix() || !*fnt)) {
- addHit(qt, words);
- }
- }
-#endif
- words++;
-#ifdef __x86_64__
- n = advance(n, _G_zero);
-#else
- n = advance(n);
-#endif
- }
- return words;
-}
-
-size_t FUTF8StrChrFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- _folded.reserve(f.size()+16*3); //Enable fulle xmm0 store
- size_t unalignedStart(0);
- bool ascii7Bit = lfoldua(f.data(), f.size(), &_folded[0], unalignedStart);
- if (ascii7Bit) {
- char * folded = &_folded[unalignedStart];
- /// Add the pattern 00 01 00 to avoid multiple eof tests of falling off the edge.
- folded[f.size()] = 0;
- folded[f.size()+1] = 0x01;
- memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values
- return match(folded, f.size(), qt);
- NEED_CHAR_STAT(addPureUsAsciiField(f.size()));
- } else {
- return UTF8StrChrFieldSearcher::matchTerm(f, qt);
- }
-}
-
-size_t FUTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- _folded.reserve(f.size()+16*3); //Enable fulle xmm0 store
- size_t unalignedStart(0);
- bool ascii7Bit = lfoldua(f.data(), f.size(), &_folded[0], unalignedStart);
- if (ascii7Bit) {
- char * folded = &_folded[unalignedStart];
- /// Add the pattern 00 01 00 to avoid multiple eof tests of falling off the edge.
- folded[f.size()] = 0;
- folded[f.size()+1] = 0x01;
- memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values
- return match(folded, f.size(), mintsz, &_qtl[0], _qtl.size());
- NEED_CHAR_STAT(addPureUsAsciiField(f.size()));
- } else {
- return UTF8StrChrFieldSearcher::matchTerms(f, mintsz);
- }
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h
deleted file mode 100644
index 900ab4c9120..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "utf8strchrfieldsearcher.h"
-
-namespace vsm {
-
-class FUTF8StrChrFieldSearcher : public UTF8StrChrFieldSearcher
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- FUTF8StrChrFieldSearcher();
- FUTF8StrChrFieldSearcher(FieldIdT fId);
- ~FUTF8StrChrFieldSearcher();
- static bool ansiFold(const char * toFold, size_t sz, char * folded);
- static bool lfoldaa(const char * toFold, size_t sz, char * folded, size_t & unalignedStart);
- static bool lfoldua(const char * toFold, size_t sz, char * folded, size_t & alignedStart);
- private:
- size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- size_t matchTerms(const FieldRef&, const size_t shortestTerm) override;
- virtual size_t match(const char *folded, size_t sz, search::streaming::QueryTerm & qt);
- size_t match(const char *folded, size_t sz, size_t mintsz, search::streaming::QueryTerm ** qtl, size_t qtlSize);
- std::vector<char> _folded;
-};
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp
deleted file mode 100644
index db93bda7778..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "geo_pos_field_searcher.h"
-#include <vespa/document/fieldvalue/arrayfieldvalue.h>
-#include <vespa/document/fieldvalue/structfieldvalue.h>
-#include <vespa/searchlib/common/geo_location_parser.h>
-#include <vespa/vespalib/util/issue.h>
-#include <vespa/vespalib/util/exception.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.searcher.geo_pos_field_searcher");
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-using search::common::GeoLocation;
-using search::common::GeoLocationParser;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher> GeoPosFieldSearcher::duplicate() const {
- return std::make_unique<GeoPosFieldSearcher>(*this);
-}
-
-GeoPosFieldSearcher::GeoPosFieldSearcher(FieldIdT fId) :
- FieldSearcher(fId),
- _geoPosTerm()
-{}
-
-GeoPosFieldSearcher::~GeoPosFieldSearcher() {}
-
-void GeoPosFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) {
- _geoPosTerm.clear();
- FieldSearcher::prepare(qtl, buf);
- for (const QueryTerm * qt : qtl) {
- const vespalib::string & str = qt->getTermString();
- GeoLocationParser parser;
- bool valid = parser.parseNoField(str);
- if (! valid) {
- vespalib::Issue::report("invalid position in term: %s", str.c_str());
- }
- _geoPosTerm.emplace_back(parser.getGeoLocation());
- }
-}
-
-void GeoPosFieldSearcher::onValue(const document::FieldValue & fv) {
- LOG(spam, "ignore field value '%s'", fv.toString().c_str());
-}
-
-void GeoPosFieldSearcher::onStructValue(const document::StructFieldValue & fv) {
- size_t num_terms = _geoPosTerm.size();
- for (size_t j = 0; j < num_terms; ++j) {
- const GeoPosInfo & gpi = _geoPosTerm[j];
- if (gpi.valid() && gpi.cmp(fv)) {
- addHit(*_qtl[j], 0);
- }
- }
- ++_words;
-}
-
-bool GeoPosFieldSearcher::GeoPosInfo::cmp(const document::StructFieldValue & sfv) const {
- try {
- auto xv = sfv.getValue("x");
- auto yv = sfv.getValue("y");
- if (xv && yv) {
- int32_t x = xv->getAsInt();
- int32_t y = yv->getAsInt();
- GeoLocation::Point p{x,y};
- if (inside_limit(p)) {
- return true;
- }
- }
- } catch (const vespalib::Exception &e) {
- vespalib::Issue::report("bad fieldvalue for GeoPosFieldSearcher: %s", e.getMessage().c_str());
- }
- return false;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h
deleted file mode 100644
index ef1c5b5a1c4..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "fieldsearcher.h"
-#include <vespa/searchlib/common/geo_location.h>
-
-namespace vsm {
-
-class GeoPosFieldSearcher : public FieldSearcher {
-public:
- GeoPosFieldSearcher(FieldIdT fId=0);
- ~GeoPosFieldSearcher();
- void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override;
- void onValue(const document::FieldValue & fv) override;
- void onStructValue(const document::StructFieldValue & fv) override;
- std::unique_ptr<FieldSearcher> duplicate() const override;
-protected:
- using GeoLocation = search::common::GeoLocation;
- class GeoPosInfo : public GeoLocation {
- public:
- GeoPosInfo (GeoLocation loc) noexcept : GeoLocation(std::move(loc)) {}
- bool cmp(const document::StructFieldValue & fv) const;
- };
- typedef std::vector<GeoPosInfo> GeoPosInfoListT;
- GeoPosInfoListT _geoPosTerm;
-};
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp
deleted file mode 100644
index 8cfb8e6df14..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "intfieldsearcher.h"
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-IntFieldSearcher::duplicate() const
-{
- return std::make_unique<IntFieldSearcher>(*this);
-}
-
-IntFieldSearcher::IntFieldSearcher(FieldIdT fId) :
- FieldSearcher(fId),
- _intTerm()
-{ }
-
-IntFieldSearcher::~IntFieldSearcher() = default;
-
-void IntFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf)
-{
- _intTerm.clear();
- FieldSearcher::prepare(qtl, buf);
- for (QueryTermList::const_iterator it=qtl.begin(); it < qtl.end(); it++) {
- const QueryTerm * qt = *it;
- size_t sz(qt->termLen());
- if (sz) {
- int64_t low;
- int64_t high;
- bool valid = qt->getAsIntegerTerm(low, high);
- _intTerm.push_back(IntInfo(low, high, valid));
- }
- }
-}
-
-void IntFieldSearcher::onValue(const document::FieldValue & fv)
-{
- for(size_t j=0, jm(_intTerm.size()); j < jm; j++) {
- const IntInfo & ii = _intTerm[j];
- if (ii.valid() && (ii.cmp(fv.getAsLong()))) {
- addHit(*_qtl[j], 0);
- }
- }
- ++_words;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h
deleted file mode 100644
index a2b17a87f4b..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "fieldsearcher.h"
-
-namespace vsm {
-
-class IntFieldSearcher : public FieldSearcher
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- IntFieldSearcher(FieldIdT fId=0);
- ~IntFieldSearcher();
- void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override;
- void onValue(const document::FieldValue & fv) override;
-protected:
- class IntInfo
- {
- public:
- IntInfo(int64_t low, int64_t high, bool v) : _lower(low), _upper(high), _valid(v) { if (low > high) { _lower = high; _upper = low; } }
- bool cmp(int64_t key) const { return (_lower <= key) && (key <= _upper); }
- bool valid() const { return _valid; }
- private:
- int64_t _lower;
- int64_t _upper;
- bool _valid;
- };
- typedef std::vector<IntInfo> IntInfoListT;
- IntInfoListT _intTerm;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp
deleted file mode 100644
index 1c4ff78ff4a..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "strchrfieldsearcher.h"
-#include <vespa/document/fieldvalue/stringfieldvalue.h>
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-void StrChrFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf)
-{
- FieldSearcher::prepare(qtl, buf);
-}
-
-void StrChrFieldSearcher::onValue(const document::FieldValue & fv)
-{
- const document::LiteralFieldValueB & sfv = static_cast<const document::LiteralFieldValueB &>(fv);
- vespalib::stringref val = sfv.getValueRef();
- FieldRef fr(val.data(), std::min(maxFieldLength(), val.size()));
- matchDoc(fr);
-}
-
-bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef)
-{
- bool retval(true);
- if (_qtl.size() > 1) {
- size_t mintsz = shortestTerm();
- if (fieldRef.size() >= mintsz) {
- _words += matchTerms(fieldRef, mintsz);
- } else {
- _words += countWords(fieldRef);
- }
- } else {
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- if (fieldRef.size() >= qt.termLen()) {
- _words += matchTerm(fieldRef, qt);
- } else {
- _words += countWords(fieldRef);
- }
- }
- }
- return retval;
-}
-
-size_t StrChrFieldSearcher::shortestTerm() const
-{
- size_t mintsz(_qtl.front()->termLen());
- for(QueryTermList::const_iterator it=_qtl.begin()+1, mt=_qtl.end(); it != mt; it++) {
- const QueryTerm & qt = **it;
- mintsz = std::min(mintsz, qt.termLen());
- }
- return mintsz;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h
deleted file mode 100644
index 0155c79cddf..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "fieldsearcher.h"
-
-namespace vsm {
-
-class StrChrFieldSearcher : public FieldSearcher
-{
-public:
- StrChrFieldSearcher() : FieldSearcher(0) { }
- StrChrFieldSearcher(FieldIdT fId) : FieldSearcher(fId) { }
- void onValue(const document::FieldValue & fv) override;
- void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override;
-private:
- size_t shortestTerm() const;
- bool matchDoc(const FieldRef & field);
- virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) = 0;
- virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) = 0;
-};
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp
deleted file mode 100644
index 977602a691c..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "utf8exactstringfieldsearcher.h"
-
-using search::byte;
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-UTF8ExactStringFieldSearcher::duplicate() const
-{
- return std::make_unique<UTF8ExactStringFieldSearcher>(*this);
-}
-
-size_t
-UTF8ExactStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- (void) mintsz;
- for (QueryTermList::iterator it = _qtl.begin(), mt = _qtl.end(); it != mt; ++it) {
- QueryTerm & qt = **it;
- matchTermExact(f, qt);
- }
- return 1;
-}
-
-size_t
-UTF8ExactStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- return matchTermExact(f, qt);
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
deleted file mode 100644
index 744974a6cf6..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h>
-
-namespace vsm
-{
-
-/**
- * This class does suffix utf8 searches.
- **/
-class UTF8ExactStringFieldSearcher : public UTF8StringFieldSearcherBase
-{
-protected:
- virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
-
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- UTF8ExactStringFieldSearcher() : UTF8StringFieldSearcherBase() { }
- UTF8ExactStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
deleted file mode 100644
index 9aef99f9fa1..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "utf8flexiblestringfieldsearcher.h"
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.searcher.utf8flexiblestringfieldsearcher");
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-UTF8FlexibleStringFieldSearcher::duplicate() const
-{
- return std::make_unique<UTF8FlexibleStringFieldSearcher>(*this);
-}
-
-size_t
-UTF8FlexibleStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- (void) mintsz;
- size_t words = 0;
- for (QueryTermList::iterator it = _qtl.begin(); it != _qtl.end(); ++it) {
- words = matchTerm(f, **it);
- }
- return words;
-}
-
-size_t
-UTF8FlexibleStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- if (qt.isPrefix()) {
- LOG(debug, "Use prefix match for prefix term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermRegular(f, qt);
- } else if (qt.isSubstring()) {
- LOG(debug, "Use substring match for substring term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermSubstring(f, qt);
- } else if (qt.isSuffix()) {
- LOG(debug, "Use suffix match for suffix term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermSuffix(f, qt);
- } else if (qt.isExactstring()) {
- LOG(debug, "Use exact match for exact term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermExact(f, qt);
- } else {
- if (substring()) {
- LOG(debug, "Use substring match for term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermSubstring(f, qt);
- } else if (suffix()) {
- LOG(debug, "Use suffix match for term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermSuffix(f, qt);
- } else if (exact()) {
- LOG(debug, "Use exact match for term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermExact(f, qt);
- } else {
- LOG(debug, "Use regular/prefix match for term '%s:%s'", qt.index().c_str(), qt.getTerm());
- return matchTermRegular(f, qt);
- }
- }
-}
-
-UTF8FlexibleStringFieldSearcher::UTF8FlexibleStringFieldSearcher() :
- UTF8StringFieldSearcherBase()
-{ }
-
-UTF8FlexibleStringFieldSearcher::UTF8FlexibleStringFieldSearcher(FieldIdT fId) :
- UTF8StringFieldSearcherBase(fId)
-{ }
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h
deleted file mode 100644
index 63931af0036..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h>
-
-namespace vsm
-{
-
-/**
- * This class does utf8 searches based on the query term type.
- * It will choose between regular search strategy (including prefix) and substring search strategy.
- **/
-class UTF8FlexibleStringFieldSearcher : public UTF8StringFieldSearcherBase
-{
-private:
- /**
- * Tries to match the given query term against the content of the given field reference.
- * Search strategy is choosen based on the query term type.
- **/
- virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
-
- /**
- * Tries to match each query term in the underlying query against the content of the given field reference.
- * Search strategy is choosen based on the query term type.
- **/
- virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
-
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- UTF8FlexibleStringFieldSearcher();
- UTF8FlexibleStringFieldSearcher(FieldIdT fId);
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
deleted file mode 100644
index 0d93009655c..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "utf8strchrfieldsearcher.h"
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-using search::byte;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-UTF8StrChrFieldSearcher::duplicate() const
-{
- return std::make_unique<UTF8StrChrFieldSearcher>(*this);
-}
-
-size_t
-UTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- (void) mintsz;
- termcount_t words(0);
- const byte * n = reinterpret_cast<const byte *> (f.data());
- const byte * e = n + f.size();
- if (f.size() >= _buf->size()) {
- _buf->reserve(f.size() + 1);
- }
- cmptype_t * fn = &(*_buf.get())[0];
- size_t fl(0);
-
- for( ; n < e; ) {
- if (!*n) { _zeroCount++; n++; }
- n = tokenize(n, _buf->capacity(), fn, fl);
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- if ((tsz <= fl) && (prefix() || qt.isPrefix() || (tsz == fl))) {
- const cmptype_t *tt=term, *et=term+tsz;
- for (const cmptype_t *fnt=fn; (tt < et) && (*tt == *fnt); tt++, fnt++);
- if (tt == et) {
- addHit(qt, words);
- }
- }
- }
- words++;
- }
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
- return words;
-}
-
-size_t
-UTF8StrChrFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- return matchTermRegular(f, qt);
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h
deleted file mode 100644
index 1687a1a18c0..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "utf8stringfieldsearcherbase.h"
-
-namespace vsm {
-
-/**
- * This class does normal utf8 searches.
- * This class uses an highly optimized version of the tokenize method in fastlib.
- **/
-class UTF8StrChrFieldSearcher : public UTF8StringFieldSearcherBase
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- UTF8StrChrFieldSearcher() : UTF8StringFieldSearcherBase() { }
- UTF8StrChrFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { }
-
-protected:
- size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
deleted file mode 100644
index 148cdf2c0c3..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
+++ /dev/null
@@ -1,320 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "utf8stringfieldsearcherbase.h"
-#include <cassert>
-
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-using search::byte;
-
-namespace vsm {
-
-const byte *
-UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * dstbuf, size_t & tokenlen)
-{
- if (maxSz > 0) {
- maxSz--;
- }
- ucs4_t c(*p);
- ucs4_t *q(dstbuf);
- const byte * end(p+maxSz);
-
- // Skip non-word characters between words
- for (; p < end; ) {
- if (c < 128) {
- if (!c) { break; }
- p++;
- if (__builtin_expect(_isWord[c], false)) {
- *q++ = _foldCase[c];
- c = 0;
- } else {
- c = *p;
- }
- } else {
- const byte * oldP(p);
- c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p);
- if (Fast_UnicodeUtil::IsWordChar(c)) {
- _utf8Count[p-oldP-1]++;
- const char *repl = ReplacementString(c);
- if (repl != NULL) {
- size_t repllen = strlen(repl);
- if (repllen > 0) {
- q = Fast_UnicodeUtil::ucs4copy(q,repl);
- }
- } else {
- c = ToFold(c);
- *q++ = c;
- }
- break;
- } else {
- if (c == _BadUTF8Char) {
- _badUtf8Count++;
- } else {
- _utf8Count[p-oldP-1]++;
- }
- c = *p;
- }
- }
- }
-
- c = *p; // Next char
- for (; p < end;) {
- if (c < 128) { // Common case, ASCII
- if (!c) { break; }
- p++;
- if (__builtin_expect(!_isWord[c], false)) {
- c = 0;
- } else {
- *q++ = _foldCase[c];
- c = *p;
- }
- } else {
- const byte * oldP(p);
- c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p);
- if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) {
- _utf8Count[p-oldP-1]++;
- const char *repl = ReplacementString(c);
- if (repl != NULL) {
- size_t repllen = strlen(repl);
- if (repllen > 0) {
- q = Fast_UnicodeUtil::ucs4copy(q,repl);
- }
- } else {
- c = ToFold(c);
- *q++ = c;
- }
-
- c = *p;
- } else {
- if (c == _BadUTF8Char) {
- _badUtf8Count++;
- } else {
- _utf8Count[p-oldP-1]++;
- }
- break;
- }
- }
- }
- *q = 0;
- tokenlen = q - dstbuf;
- return p;
-}
-
-size_t
-UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt)
-{
- termcount_t words(0);
- const byte * n = reinterpret_cast<const byte *> (f.data());
- // __builtin_prefetch(n, 0, 0);
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- const byte * e = n + f.size();
- if ( f.size() >= _buf->size()) {
- _buf->reserve(f.size() + 1);
- }
- cmptype_t * fn = &(*_buf.get())[0];
- size_t fl(0);
-
- for( ; n < e; ) {
- if (!*n) { _zeroCount++; n++; }
- n = tokenize(n, _buf->capacity(), fn, fl);
- if ((tsz <= fl) && (prefix() || qt.isPrefix() || (tsz == fl))) {
- const cmptype_t *tt=term, *et=term+tsz;
- for (const cmptype_t *fnt=fn; (tt < et) && (*tt == *fnt); tt++, fnt++);
- if (tt == et) {
- addHit(qt, words);
- }
- }
- words++;
- }
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
- return words;
-}
-
-size_t
-UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt)
-{
- const byte * n = reinterpret_cast<const byte *> (f.data());
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- const cmptype_t * eterm = term+tsz;
- const byte * e = n + f.size();
- if (tsz <= f.size()) {
- bool equal(true);
- for (; equal && (n < e) && (term < eterm); term++) {
- if (*term < 0x80) {
- equal = (*term == _foldCase[*n++]);
- } else {
- cmptype_t c = ToFold(Fast_UnicodeUtil::GetUTF8CharNonAscii(n));
- equal = (*term == c);
- }
- }
- if (equal && (term == eterm) && (qt.isPrefix() || (n == e))) {
- addHit(qt,0);
- }
- }
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
- return 1;
-}
-
-size_t
-UTF8StringFieldSearcherBase::matchTermSubstring(const FieldRef & f, QueryTerm & qt)
-{
- if (qt.termLen() == 0) { return 0; }
- const byte * n = reinterpret_cast<const byte *> (f.data());
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- if ( f.size() >= _buf->size()) {
- _buf->reserve(f.size() + 1);
- }
- cmptype_t * fntemp = &(*_buf.get())[0];
- BufferWrapper wrapper(fntemp);
- size_t fl = skipSeparators(n, f.size(), wrapper);
- const cmptype_t * fn(fntemp);
- const cmptype_t * fe = fn + fl;
- const cmptype_t * fre = fe - tsz;
- termcount_t words(0);
- for(words = 0; fn <= fre; ) {
- const cmptype_t *tt=term, *et=term+tsz, *fnt=fn;
- for (; (tt < et) && (*tt == *fnt); tt++, fnt++);
- if (tt == et) {
- fn = fnt;
- addHit(qt, words);
- } else {
- if ( ! Fast_UnicodeUtil::IsWordChar(*fn++) ) {
- words++;
- for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn) ; fn++ );
- }
- }
- }
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
- return words + 1; // we must also count the last word
-}
-
-size_t
-UTF8StringFieldSearcherBase::matchTermSuffix(const FieldRef & f, QueryTerm & qt)
-{
- termcount_t words = 0;
- const byte * srcbuf = reinterpret_cast<const byte *> (f.data());
- const byte * srcend = srcbuf + f.size();
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- if (f.size() >= _buf->size()) {
- _buf->reserve(f.size() + 1);
- }
- cmptype_t * dstbuf = &(*_buf.get())[0];
- size_t tokenlen = 0;
-
- for( ; srcbuf < srcend; ) {
- if (*srcbuf == 0) {
- ++_zeroCount;
- ++srcbuf;
- }
- srcbuf = tokenize(srcbuf, _buf->capacity(), dstbuf, tokenlen);
- if (matchTermSuffix(term, tsz, dstbuf, tokenlen)) {
- addHit(qt, words);
- }
- words++;
- }
- return words;
-}
-
-UTF8StringFieldSearcherBase::UTF8StringFieldSearcherBase() :
- StrChrFieldSearcher(),
- Fast_NormalizeWordFolder(),
- Fast_UnicodeUtil()
-{
-}
-
-UTF8StringFieldSearcherBase::UTF8StringFieldSearcherBase(FieldIdT fId) :
- StrChrFieldSearcher(fId),
- Fast_NormalizeWordFolder(),
- Fast_UnicodeUtil()
-{
-}
-
-UTF8StringFieldSearcherBase::~UTF8StringFieldSearcherBase() {}
-
-void
-UTF8StringFieldSearcherBase::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf)
-{
- StrChrFieldSearcher::prepare(qtl, buf);
- _buf = buf;
-}
-
-bool
-UTF8StringFieldSearcherBase::matchTermSuffix(const cmptype_t * term, size_t termlen,
- const cmptype_t * word, size_t wordlen)
-{
- if ((termlen <= wordlen)) {
- const cmptype_t * titr = term + termlen - 1;
- const cmptype_t * witr = word + wordlen - 1;
- bool hit = true;
- // traverse the term and the word back to front
- for (; titr >= term; --titr, --witr) {
- if (*titr != *witr) {
- hit = false;
- break;
- }
- }
- return hit;
- }
- return false;
-}
-
-bool
-UTF8StringFieldSearcherBase::isSeparatorCharacter(ucs4_t c)
-{
- return ((c < 0x20) && (c != '\n') && (c != '\t'));
-}
-
-template <typename T>
-size_t
-UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T & dstbuf) {
- const search::byte * e(p+sz);
- const search::byte * b(p);
-
- for(; p < e; ) {
- ucs4_t c(*p);
- const search::byte * oldP(p);
- if (c < 128) {
- p++;
- if (!isSeparatorCharacter(c)) {
- dstbuf.onCharacter(_foldCase[c], (oldP - b));
- }
- } else {
- c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p);
- const char *repl = ReplacementString(c);
- if (repl != NULL) {
- size_t repllen = strlen(repl);
- if (repllen > 0) {
- ucs4_t * buf = dstbuf.getBuf();
- ucs4_t * newBuf = Fast_UnicodeUtil::ucs4copy(buf, repl);
- if (dstbuf.hasOffsets()) {
- for (; buf < newBuf; ++buf) {
- dstbuf.incBuf(1);
- dstbuf.onOffset(oldP - b);
- }
- } else {
- dstbuf.incBuf(newBuf - buf);
- }
- }
- } else {
- c = ToFold(c);
- dstbuf.onCharacter(c, (oldP - b));
- }
- if (c == _BadUTF8Char) {
- _badUtf8Count++;
- } else {
- _utf8Count[p-oldP-1]++;
- }
- }
- }
- assert(dstbuf.valid());
- return dstbuf.size();
-}
-
-template unsigned long UTF8StringFieldSearcherBase::skipSeparators<UTF8StringFieldSearcherBase::BufferWrapper>(unsigned char const*, unsigned long, UTF8StringFieldSearcherBase::BufferWrapper&);
-template unsigned long UTF8StringFieldSearcherBase::skipSeparators<UTF8StringFieldSearcherBase::OffsetWrapper>(unsigned char const*, unsigned long, UTF8StringFieldSearcherBase::OffsetWrapper&);
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
deleted file mode 100644
index f540a7ac457..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "strchrfieldsearcher.h"
-#include <vespa/fastlib/text/normwordfolder.h>
-
-namespace vsm {
-
-/**
- * This class is the base class for all utf8 string searchers.
- * It contains utility functions used by the other searchers.
- * As normal the prepare method is called
- * after the query is built. A SharedSearcherBuf is used given to it. This is a
- * buffer that is shared among all searchers that are run in the same context.
- * Reuse of this buffer ensures better cache hit ratio because this is just a
- * scratchpad for tokenizing. It will grow till the max size and stay there.
- **/
-class UTF8StringFieldSearcherBase : public StrChrFieldSearcher, protected Fast_NormalizeWordFolder, public Fast_UnicodeUtil
-{
-public:
- /**
- * Template class that wraps an ucs4 buffer.
- * Used when invoking skipSeparators() during substring matching.
- **/
- class BufferWrapper
- {
- protected:
- ucs4_t * _bbuf;
- ucs4_t * _cbuf;
-
- public:
- BufferWrapper(ucs4_t * buf) : _bbuf(buf), _cbuf(buf) { }
- BufferWrapper(ucs4_t * buf, size_t *) : _bbuf(buf), _cbuf(buf) { }
- void onCharacter(ucs4_t ch, size_t) { *_cbuf++ = ch; }
- void onOffset(size_t) { }
- void incBuf(size_t inc) { _cbuf += inc; }
- ucs4_t * getBuf() { return _cbuf; }
- bool valid() { return true; }
- size_t size() { return (_cbuf - _bbuf); }
- bool hasOffsets() { return false; }
- };
-
- /**
- * Template class that wraps an offset buffer in addition to an ucs4 buffer.
- * The offset buffer contains offsets into the original utf8 buffer.
- **/
- class OffsetWrapper : public BufferWrapper
- {
- private:
- size_t * _boff;
- size_t * _coff;
-
- public:
- OffsetWrapper(ucs4_t * buf, size_t * offsets) : BufferWrapper(buf), _boff(offsets), _coff(offsets) {}
- void onCharacter(ucs4_t ch, size_t of) { *_cbuf++ = ch; *_coff++ = of; }
- void onOffset(size_t of) { *_coff++ = of; }
- bool valid() { return (size() == (size_t)(_coff - _boff)); }
- bool hasOffsets() { return true; }
- };
-
-protected:
- SharedSearcherBuf _buf;
-
- const search::byte * tokenize(const search::byte * buf, size_t maxSz, cmptype_t * dstbuf, size_t & tokenlen);
-
- /**
- * Matches the given query term against the words in the given field reference
- * using exact or prefix match strategy.
- *
- * @param f the field reference to match against.
- * @param qt the query term trying to match.
- * @return the number of words in the field ref.
- **/
- size_t matchTermRegular(const FieldRef & f, search::streaming::QueryTerm & qt);
-
- /**
- * Matches the given query term against the characters in the given field reference
- * using substring match strategy.
- *
- * @param f the field reference to match against.
- * @param qt the query term trying to match.
- * @return the number of words in the field ref.
- **/
- size_t matchTermSubstring(const FieldRef & f, search::streaming::QueryTerm & qt);
-
- /**
- * Matches the given query term against the words in the given field reference
- * using suffix match strategy.
- *
- * @param f the field reference to match against.
- * @param qt the query term trying to match.
- * @return the number of words in the field ref.
- **/
- size_t matchTermSuffix(const FieldRef & f, search::streaming::QueryTerm & qt);
-
- /**
- * Matches the given query term against the words in the given field reference
- * using exact match strategy.
- *
- * @param f the field reference to match against.
- * @param qt the query term trying to match.
- * @return the number of words in the field ref.
- **/
- size_t matchTermExact(const FieldRef & f, search::streaming::QueryTerm & qt);
-
-public:
- UTF8StringFieldSearcherBase();
- UTF8StringFieldSearcherBase(FieldIdT fId);
- ~UTF8StringFieldSearcherBase();
- void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override;
- /**
- * Matches the given query term against the given word using suffix match strategy.
- *
- * @param term the buffer with the term.
- * @param termLen the length of the term.
- * @param word the buffer with the word.
- * @param wordlen the length of the word.
- * @return true if the term matches the word.
- **/
- static bool matchTermSuffix(const cmptype_t * term, size_t termlen,
- const cmptype_t * word, size_t wordlen);
-
- /**
- * Checks whether the given character is a separator character.
- **/
- static bool isSeparatorCharacter(ucs4_t);
-
- /**
- * Transforms the given utf8 array into an array of ucs4 characters.
- * Folding is performed. Separator characters are skipped.
- **/
- template <typename T>
- size_t skipSeparators(const search::byte * p, size_t sz, T & dstbuf);
-
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
deleted file mode 100644
index fd327d3a3df..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vsm/searcher/utf8substringsearcher.h>
-
-using search::byte;
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-UTF8SubStringFieldSearcher::duplicate() const
-{
- return std::make_unique<UTF8SubStringFieldSearcher>(*this);
-}
-
-size_t
-UTF8SubStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- const byte * n = reinterpret_cast<const byte *> (f.data());
- if ( f.size() >= _buf->size()) {
- _buf->reserve(f.size() + 1);
- }
- cmptype_t * fntemp = &(*_buf.get())[0];
- BufferWrapper wrapper(fntemp);
- size_t fl = skipSeparators(n, f.size(), wrapper);
- const cmptype_t * fn(fntemp);
- const cmptype_t * fe = fn + fl;
- const cmptype_t * fre = fe - mintsz;
- termcount_t words(0);
- for(words = 0; fn <= fre; ) {
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
-
- const cmptype_t *tt=term, *et=term+tsz, *fnt=fn;
- for (; (tt < et) && (*tt == *fnt); tt++, fnt++);
- if (tt == et) {
- addHit(qt, words);
- }
- }
- if ( ! Fast_UnicodeUtil::IsWordChar(*fn++) ) {
- words++;
- for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn); fn++ );
- }
- }
-
- NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
- return words + 1; // we must also count the last word
-}
-
-size_t
-UTF8SubStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- return matchTermSubstring(f, qt);
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h
deleted file mode 100644
index 1c463c28847..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h>
-
-namespace vsm {
-
-/**
- * This class does substring utf8 searches.
- **/
-class UTF8SubStringFieldSearcher : public UTF8StringFieldSearcherBase
-{
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- UTF8SubStringFieldSearcher() : UTF8StringFieldSearcherBase() { }
- UTF8SubStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { }
-protected:
- size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp
deleted file mode 100644
index be02a58cfda..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "utf8substringsnippetmodifier.h"
-#include <cassert>
-
-using search::byte;
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-UTF8SubstringSnippetModifier::duplicate() const
-{
- return std::make_unique<UTF8SubstringSnippetModifier>(*this);
-}
-
-size_t
-UTF8SubstringSnippetModifier::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- _modified->reset();
- _readPtr = f.data();
- const byte * src = reinterpret_cast<const byte *> (f.data());
- // resize ucs4 buffer
- if (f.size() >= _buf->size()) {
- _buf->resize(f.size() + 1);
- }
- // resize offset buffers
- if (f.size() >= _offsets->size()) {
- _offsets->resize(f.size() + 1);
- }
- // resize modified buffer
- if (f.size() + 16 > _modified->getLength()) {
- _modified->resize(f.size() + 16); // make room for some unit separators
- }
- cmptype_t * dbegin = &(*_buf.get())[0];
- OffsetWrapper wrapper(dbegin, &(*_offsets)[0]);
- size_t numchars = skipSeparators(src, f.size(), wrapper);
- const cmptype_t * ditr = dbegin;
- const cmptype_t * dend = ditr + numchars;
- const cmptype_t * drend = dend - mintsz;
- termcount_t words = 0;
- for(; ditr <= drend; ) {
- for (QueryTermList::iterator itr = _qtl.begin(); itr != _qtl.end(); ++itr) {
- QueryTerm & qt = **itr;
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
-
- const cmptype_t * titr = term;
- const cmptype_t * tend = term + tsz;
- const cmptype_t * dtmp = ditr;
- for (; (titr < tend) && (*titr == *dtmp); ++titr, ++dtmp);
- if (titr == tend) {
- const char * mbegin = f.data() + (*_offsets)[ditr - dbegin];
- const char * mend = f.data() + ((dtmp < dend) ? ((*_offsets)[dtmp - dbegin]) : f.size());
- if (_readPtr <= mbegin) {
- // We will only copy from the field ref once.
- // If we have overlapping matches only the first one will be considered.
- insertSeparators(mbegin, mend);
- }
- addHit(qt, words);
- }
- }
- if ( ! Fast_UnicodeUtil::IsWordChar(*ditr++) ) {
- words++;
- for(; (ditr < drend) && ! Fast_UnicodeUtil::IsWordChar(*ditr) ; ++ditr );
- }
- }
- assert(_readPtr <= (f.data() + f.size()));
- // copy remaining
- size_t toCopy = f.size() - (_readPtr - f.data());
- copyToModified(toCopy);
-
- return words + 1; // we must also count the last word
-}
-
-size_t
-UTF8SubstringSnippetModifier::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- return matchTerms(f, tsz);
-}
-
-void
-UTF8SubstringSnippetModifier::copyToModified(size_t n, bool skipSep)
-{
- if (n == 0) {
- return;
- }
- if (skipSep) {
- for (const char * readEnd = _readPtr + n; _readPtr < readEnd; ++_readPtr) {
- if (!isSeparatorCharacter(*_readPtr)) {
- _modified->put(*_readPtr);
- }
- }
- } else {
- _modified->put(_readPtr, n);
- _readPtr += n;
- }
-}
-
-void
-UTF8SubstringSnippetModifier::insertSeparators(const char * mbegin, const char * mend)
-{
- copyToModified(mbegin - _readPtr);
- _modified->put(_unitSep);
- // skip separators such that the match is not splitted.
- copyToModified((mend - mbegin), true);
- _modified->put(_unitSep);
-}
-
-UTF8SubstringSnippetModifier::UTF8SubstringSnippetModifier() :
- UTF8StringFieldSearcherBase(),
- _modified(new CharBuffer(32)),
- _offsets(new std::vector<size_t>(32)),
- _readPtr(NULL),
- _unitSep('\x1F')
-{
-}
-
-UTF8SubstringSnippetModifier::UTF8SubstringSnippetModifier(FieldIdT fId) :
- UTF8StringFieldSearcherBase(fId),
- _modified(new CharBuffer(32)),
- _offsets(new std::vector<size_t>(32)),
- _readPtr(NULL),
- _unitSep('\x1F')
-{
-}
-
-UTF8SubstringSnippetModifier::UTF8SubstringSnippetModifier(FieldIdT fId,
- const CharBuffer::SP & modBuf,
- const SharedOffsetBuffer & offBuf) :
- UTF8StringFieldSearcherBase(fId),
- _modified(modBuf),
- _offsets(offBuf),
- _readPtr(NULL),
- _unitSep('\x1F')
-{
-}
-
-UTF8SubstringSnippetModifier::~UTF8SubstringSnippetModifier() {}
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h
deleted file mode 100644
index 0127a7f2827..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "utf8stringfieldsearcherbase.h"
-#include <vespa/vsm/common/charbuffer.h>
-
-namespace vsm {
-
-typedef std::shared_ptr<std::vector<size_t> > SharedOffsetBuffer;
-
-/**
- * This class does substring searches the same way as UTF8SubStringFieldSearcher.
- * While matching the query term(s) against the field reference it builds a modified
- * buffer based on the field reference where the only difference is that unit separators
- * are inserted before and after a match. These extra unit separators make it possible
- * to highlight a substring match when later generating snippets.
- **/
-class UTF8SubstringSnippetModifier : public UTF8StringFieldSearcherBase
-{
-private:
- CharBuffer::SP _modified; // buffer to write the modified field value
- SharedOffsetBuffer _offsets; // for each character in _buf we have an offset into the utf8 buffer (field reference)
- const char * _readPtr; // buffer to read from (field reference)
- char _unitSep; // the unit separator character to use
-
- virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
-
- /**
- * Copies n bytes from the field reference to the modified buffer and updates the read pointer.
- * Separator characters from the field reference can be skipped.
- * This is to avoid that a match is splitted by separator characters from the original field reference.
- *
- * @param n the number of bytes to copy.
- * @param skipSep whether we should skip separator characters from the field reference.
- **/
- void copyToModified(size_t n, bool skipSep = false);
-
- /**
- * Copies from the field reference to the modified buffer and inserts unit separators for a match
- * starting at mbegin (in the field reference) and ending at mend (in the field reference).
- * A unit separator is inserted before and after the match.
- *
- * @param mbegin the beginning of the match.
- * @param mend the end of the match.
- **/
- void insertSeparators(const char * mbegin, const char * mend);
-
-public:
- typedef std::shared_ptr<UTF8SubstringSnippetModifier> SP;
-
- std::unique_ptr<FieldSearcher> duplicate() const override;
-
- UTF8SubstringSnippetModifier();
- UTF8SubstringSnippetModifier(FieldIdT fId);
- ~UTF8SubstringSnippetModifier();
-
- /**
- * Creates a new instance.
- *
- * @param fId the field id to operate on.
- * @param modBuf the shared buffer used to store the modified field value.
- * @param offBuf the shared buffer used to store the offsets into the field reference.
- **/
- UTF8SubstringSnippetModifier(FieldIdT fId, const CharBuffer::SP & modBuf, const SharedOffsetBuffer & offBuf);
-
- const CharBuffer & getModifiedBuf() const { return *_modified; }
- const search::streaming::QueryTermList & getQueryTerms() const { return _qtl; }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp
deleted file mode 100644
index 3495d46b85b..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "utf8suffixstringfieldsearcher.h"
-
-using search::byte;
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-
-namespace vsm {
-
-std::unique_ptr<FieldSearcher>
-UTF8SuffixStringFieldSearcher::duplicate() const
-{
- return std::make_unique<UTF8SuffixStringFieldSearcher>(*this);
-}
-
-size_t
-UTF8SuffixStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
-{
- (void) mintsz;
- termcount_t words = 0;
- const byte * srcbuf = reinterpret_cast<const byte *> (f.data());
- const byte * srcend = srcbuf + f.size();
- if (f.size() >= _buf->size()) {
- _buf->reserve(f.size() + 1);
- }
- cmptype_t * dstbuf = &(*_buf.get())[0];
- size_t tokenlen = 0;
-
- for( ; srcbuf < srcend; ) {
- if (*srcbuf == 0) {
- ++_zeroCount;
- ++srcbuf;
- }
- srcbuf = tokenize(srcbuf, _buf->capacity(), dstbuf, tokenlen);
- for (QueryTermList::iterator it = _qtl.begin(), mt = _qtl.end(); it != mt; ++it) {
- QueryTerm & qt = **it;
- const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- if (matchTermSuffix(term, tsz, dstbuf, tokenlen)) {
- addHit(qt, words);
- }
- }
- words++;
- }
- return words;
-}
-
-size_t
-UTF8SuffixStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
-{
- return matchTermSuffix(f, qt);
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
deleted file mode 100644
index 0640ac22da5..00000000000
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h>
-
-namespace vsm
-{
-
-/**
- * This class does suffix utf8 searches.
- **/
-class UTF8SuffixStringFieldSearcher : public UTF8StringFieldSearcherBase
-{
-protected:
- virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
-
-public:
- std::unique_ptr<FieldSearcher> duplicate() const override;
- UTF8SuffixStringFieldSearcher() : UTF8StringFieldSearcherBase() { }
- UTF8SuffixStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/.gitignore b/streamingvisitors/src/vespa/vsm/vsm/.gitignore
deleted file mode 100644
index 95bc02923a9..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.exe
-*.ilk
-*.pdb
-.depend*
-Makefile
diff --git a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt
deleted file mode 100644
index adc00b341a3..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_library(vsm_vsmbase OBJECT
- SOURCES
- docsumfieldspec.cpp
- docsumfilter.cpp
- fieldsearchspec.cpp
- flattendocsumwriter.cpp
- slimefieldwriter.cpp
- snippetmodifier.cpp
- vsm-adapter.cpp
- docsumconfig.cpp
- DEPENDS
- vsm_vconfig
-)
diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp
deleted file mode 100644
index 656e9eed132..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vsm/vsm/docsumconfig.h>
-#include <vespa/searchsummary/docsummary/docsumfieldwriter.h>
-#include <vespa/searchsummary/docsummary/matched_elements_filter_dfw.h>
-#include <vespa/searchlib/common/matching_elements_fields.h>
-#include <vespa/vsm/config/config-vsmfields.h>
-#include <vespa/vsm/config/config-vsmsummary.h>
-
-using search::MatchingElementsFields;
-using search::docsummary::IDocsumFieldWriter;
-using search::docsummary::EmptyDFW;
-using search::docsummary::MatchedElementsFilterDFW;
-using search::docsummary::ResultConfig;
-using vespa::config::search::vsm::VsmfieldsConfig;
-using vespa::config::search::vsm::VsmsummaryConfig;
-
-namespace vsm {
-
-namespace {
-
-void populate_fields(MatchingElementsFields& fields, VsmfieldsConfig& fields_config, const vespalib::string& field_name)
-{
- vespalib::string prefix = field_name + ".";
- for (const auto& spec : fields_config.fieldspec) {
- if (spec.name.substr(0, prefix.size()) == prefix) {
- fields.add_mapping(field_name, spec.name);
- }
- if (spec.name == field_name) {
- fields.add_field(field_name);
- }
- }
-}
-
-}
-
-DynamicDocsumConfig::DynamicDocsumConfig(search::docsummary::IDocsumEnvironment* env, search::docsummary::DynamicDocsumWriter* writer, std::shared_ptr<VsmfieldsConfig> vsm_fields_config)
- : Parent(env, writer),
- _vsm_fields_config(std::move(vsm_fields_config))
-{
-}
-
-IDocsumFieldWriter::UP
-DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string & overrideName, const string & argument, bool & rc, std::shared_ptr<search::MatchingElementsFields> matching_elems_fields)
-{
- IDocsumFieldWriter::UP fieldWriter;
- if ((overrideName == "staticrank") ||
- (overrideName == "ranklog") ||
- (overrideName == "label") ||
- (overrideName == "project") ||
- (overrideName == "positions") ||
- (overrideName == "absdist") ||
- (overrideName == "subproject"))
- {
- fieldWriter = std::make_unique<EmptyDFW>();
- rc = true;
- } else if ((overrideName == "attribute") ||
- (overrideName == "attributecombiner") ||
- (overrideName == "geopos")) {
- rc = true;
- } else if ((overrideName == "matchedattributeelementsfilter") ||
- (overrideName == "matchedelementsfilter")) {
- string source_field = argument.empty() ? fieldName : argument;
- const ResultConfig& resultConfig = getResultConfig();
- int source_field_enum = resultConfig.GetFieldNameEnum().Lookup(source_field.c_str());
- populate_fields(*matching_elems_fields, *_vsm_fields_config, source_field);
- fieldWriter = MatchedElementsFilterDFW::create(source_field, source_field_enum, matching_elems_fields);
- rc = static_cast<bool>(fieldWriter);
- } else {
- fieldWriter = search::docsummary::DynamicDocsumConfig::createFieldWriter(fieldName, overrideName, argument, rc, matching_elems_fields);
- }
- return fieldWriter;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h b/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h
deleted file mode 100644
index 11010c04e90..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/searchsummary/docsummary/docsumconfig.h>
-
-namespace vespa::config::search::vsm {
-namespace internal { class InternalVsmfieldsType; }
-typedef const internal::InternalVsmfieldsType VsmfieldsConfig;
-}
-namespace vsm {
-
-class DynamicDocsumConfig : public search::docsummary::DynamicDocsumConfig
-{
-public:
- using Parent = search::docsummary::DynamicDocsumConfig;
- using VsmfieldsConfig = vespa::config::search::vsm::VsmfieldsConfig;
-private:
- std::shared_ptr<VsmfieldsConfig> _vsm_fields_config;
-public:
- DynamicDocsumConfig(search::docsummary::IDocsumEnvironment* env, search::docsummary::DynamicDocsumWriter* writer, std::shared_ptr<VsmfieldsConfig> vsm_fields_config);
-private:
- std::unique_ptr<search::docsummary::IDocsumFieldWriter>
- createFieldWriter(const string & fieldName, const string & overrideName,
- const string & cf, bool & rc, std::shared_ptr<search::MatchingElementsFields> matching_elems_fields) override;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp
deleted file mode 100644
index 936aaaa2091..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "docsumfieldspec.h"
-
-namespace vsm {
-
-DocsumFieldSpec::FieldIdentifier::FieldIdentifier() :
- _id(StringFieldIdTMap::npos),
- _path()
-{ }
-
-DocsumFieldSpec::FieldIdentifier::FieldIdentifier(FieldIdT id, FieldPath path) :
- _id(id),
- _path(std::move(path))
-{ }
-
-DocsumFieldSpec::FieldIdentifier::FieldIdentifier(FieldIdentifier &&) noexcept = default;
-DocsumFieldSpec::FieldIdentifier & DocsumFieldSpec::FieldIdentifier::operator=(FieldIdentifier &&) noexcept = default;
-DocsumFieldSpec::FieldIdentifier::~FieldIdentifier() = default;
-
-DocsumFieldSpec::DocsumFieldSpec() :
- _resultType(search::docsummary::RES_INT),
- _command(VsmsummaryConfig::Fieldmap::Command::NONE),
- _outputField(),
- _inputFields()
-{ }
-
-DocsumFieldSpec::DocsumFieldSpec(search::docsummary::ResType resultType,
- VsmsummaryConfig::Fieldmap::Command command) :
- _resultType(resultType),
- _command(command),
- _outputField(),
- _inputFields()
-{ }
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h
deleted file mode 100644
index db6ee9fa223..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/searchsummary/docsummary/resultclass.h>
-#include <vespa/vsm/common/document.h>
-#include <vespa/vsm/common/storagedocument.h>
-#include <vespa/vsm/config/vsm-cfif.h>
-
-namespace vsm {
-
-/**
- * This class contains the specifications for how to generate a summary field.
- **/
-class DocsumFieldSpec {
-public:
- /**
- * This class contains a field id and a field path (to navigate a field value).
- **/
- class FieldIdentifier {
- private:
- FieldIdT _id;
- FieldPath _path;
-
- public:
- FieldIdentifier();
- FieldIdentifier(FieldIdT id, FieldPath path);
- FieldIdentifier(FieldIdentifier &&) noexcept;
- FieldIdentifier & operator=(FieldIdentifier &&) noexcept;
- FieldIdentifier(const FieldIdentifier &) = delete;
- FieldIdentifier & operator=(const FieldIdentifier &) = delete;
- ~FieldIdentifier();
- FieldIdT getId() const { return _id; }
- const FieldPath & getPath() const { return _path; }
- };
-
- typedef std::vector<FieldIdentifier> FieldIdentifierVector;
-
-private:
- search::docsummary::ResType _resultType;
- VsmsummaryConfig::Fieldmap::Command _command;
- FieldIdentifier _outputField;
- FieldIdentifierVector _inputFields;
-
-public:
- DocsumFieldSpec();
- DocsumFieldSpec(search::docsummary::ResType resultType, VsmsummaryConfig::Fieldmap::Command command);
-
- /**
- * Returns the result type for the summary field.
- **/
- search::docsummary::ResType getResultType() const { return _resultType; }
-
- /**
- * Returns the command specifying how to transform input fields into output summary field.
- **/
- VsmsummaryConfig::Fieldmap::Command getCommand() const { return _command; }
-
- /**
- * Returns whether the input field and output field are identical.
- **/
- bool hasIdentityMapping() const {
- return _inputFields.size() == 1 && _outputField.getId() == _inputFields[0].getId();
- }
-
- const FieldIdentifier & getOutputField() const { return _outputField; }
- void setOutputField(FieldIdentifier outputField) { _outputField = std::move(outputField); }
- const FieldIdentifierVector & getInputFields() const { return _inputFields; }
- FieldIdentifierVector & getInputFields() { return _inputFields; }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp
deleted file mode 100644
index 70759feb41c..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp
+++ /dev/null
@@ -1,477 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "docsumfilter.h"
-#include "slimefieldwriter.h"
-#include <vespa/searchsummary/docsummary/summaryfieldconverter.h>
-#include <vespa/document/base/exceptions.h>
-#include <vespa/document/fieldvalue/iteratorhandler.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.docsumfilter");
-
-using namespace search::docsummary;
-
-
-namespace {
-
-class Handler : public document::fieldvalue::IteratorHandler {
-public:
-};
-
-struct IntResultHandler : public Handler {
- int32_t value;
- IntResultHandler() : value(0) {}
- void onPrimitive(uint32_t, const Content & c) override {
- value = c.getValue().getAsInt();
- }
-};
-
-struct LongResultHandler : public Handler {
- int64_t value;
- LongResultHandler() : value(0) {}
- void onPrimitive(uint32_t, const Content & c) override {
- value = c.getValue().getAsLong();
- }
-};
-
-struct FloatResultHandler : public Handler {
- float value;
- FloatResultHandler() : value(0) {}
- void onPrimitive(uint32_t, const Content & c) override {
- value = c.getValue().getAsFloat();
- }
-};
-
-struct DoubleResultHandler : public Handler {
- double value;
- DoubleResultHandler() : value(0) {}
- void onPrimitive(uint32_t, const Content & c) override {
- value = c.getValue().getAsDouble();
- }
-};
-
-class StringResultHandler : public Handler {
-private:
- ResType _type;
- ResultPacker & _packer;
- void addToPacker(const char * buf, size_t len) {
- switch (_type) {
- case RES_STRING:
- _packer.AddString(buf, len);
- break;
- case RES_LONG_STRING:
- _packer.AddLongString(buf, len);
- break;
- default:
- break;
- }
- }
-
-public:
- StringResultHandler(ResType t, ResultPacker & p) : _type(t), _packer(p) {}
- void onPrimitive(uint32_t, const Content & c) override {
- const document::FieldValue & fv = c.getValue();
- if (fv.isLiteral()) {
- const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv);
- vespalib::stringref s = lfv.getValueRef();
- addToPacker(s.data(), s.size());
- } else {
- vespalib::string s = fv.toString();
- addToPacker(s.c_str(), s.size());
- }
- }
-};
-
-class RawResultHandler : public Handler {
-private:
- ResType _type;
- ResultPacker & _packer;
-
-public:
- RawResultHandler(ResType t, ResultPacker & p) : _type(t), _packer(p) {}
- void onPrimitive(uint32_t, const Content & c) override {
- const document::FieldValue & fv = c.getValue();
- try {
- std::pair<const char *, size_t> buf = fv.getAsRaw();
- if (buf.first != nullptr) {
- switch (_type) {
- case RES_DATA:
- _packer.AddData(buf.first, buf.second);
- break;
- case RES_LONG_DATA:
- _packer.AddLongData(buf.first, buf.second);
- break;
- default:
- break;
- }
- }
- } catch (document::InvalidDataTypeConversionException & e) {
- LOG(warning, "RawResultHandler: Could not get field value '%s' as raw. Skipping writing this field", fv.toString().c_str());
- _packer.AddEmpty();
- }
- }
-};
-
-
-}
-
-
-namespace vsm {
-
-FieldPath
-copyPathButFirst(const FieldPath & rhs) {
- // skip the element that correspond to the start field value
- FieldPath path;
- if ( ! rhs.empty()) {
- for (auto it = rhs.begin() + 1; it != rhs.end(); ++it) {
- path.push_back(std::make_unique<document::FieldPathEntry>(**it));
- }
- }
- return path;
-}
-
-void
-DocsumFilter::prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldSpec & toolsSpec,
- const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap)
-{
- { // setup output field
- const vespalib::string & name = toolsSpec.getOutputName();
- LOG(debug, "prepareFieldSpec: output field name '%s'", name.c_str());
- FieldIdT field = fieldMap.fieldNo(name);
- if (field != FieldMap::npos) {
- if (field < fieldPathMap.size()) {
- spec.setOutputField(DocsumFieldSpec::FieldIdentifier(field, copyPathButFirst(fieldPathMap[field])));
- } else {
- LOG(warning, "Could not find a field path for field '%s' with id '%d'", name.c_str(), field);
- spec.setOutputField(DocsumFieldSpec::FieldIdentifier(field, FieldPath()));
- }
- } else {
- LOG(warning, "Could not find output summary field '%s'", name.c_str());
- }
- }
- // setup input fields
- for (size_t i = 0; i < toolsSpec.getInputNames().size(); ++i) {
- const vespalib::string & name = toolsSpec.getInputNames()[i];
- LOG(debug, "prepareFieldSpec: input field name '%s'", name.c_str());
- FieldIdT field = fieldMap.fieldNo(name);
- if (field != FieldMap::npos) {
- if (field < fieldPathMap.size()) {
- LOG(debug, "field %u < map size %zu", field, fieldPathMap.size());
- spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier(field, copyPathButFirst(fieldPathMap[field])));
- } else {
- LOG(warning, "Could not find a field path for field '%s' with id '%d'", name.c_str(), field);
- spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier(field, FieldPath()));
- }
- if (_highestFieldNo <= field) {
- _highestFieldNo = field + 1;
- }
- } else {
- LOG(warning, "Could not find input summary field '%s'", name.c_str());
- }
- }
-}
-
-const document::FieldValue *
-DocsumFilter::getFieldValue(const DocsumFieldSpec::FieldIdentifier & fieldId,
- VsmsummaryConfig::Fieldmap::Command command,
- const Document & docsum, bool & modified)
-{
- FieldIdT fId = fieldId.getId();
- const document::FieldValue * fv = docsum.getField(fId);
- if (fv == nullptr) {
- return nullptr;
- }
- switch (command) {
- case VsmsummaryConfig::Fieldmap::Command::FLATTENJUNIPER:
- if (_snippetModifiers != nullptr) {
- FieldModifier * mod = _snippetModifiers->getModifier(fId);
- if (mod != nullptr) {
- _cachedValue = mod->modify(*fv, fieldId.getPath());
- modified = true;
- return _cachedValue.get();
- }
- }
- [[fallthrough]];
- default:
- return fv;
- }
-}
-
-
-DocsumFilter::DocsumFilter(const DocsumToolsPtr &tools, const IDocSumCache & docsumCache) :
- _docsumCache(&docsumCache),
- _tools(tools),
- _fields(),
- _highestFieldNo(0),
- _packer(tools ? tools->getResultConfig() : nullptr),
- _flattenWriter(),
- _snippetModifiers(nullptr),
- _cachedValue(),
- _emptyFieldPath()
-{ }
-
-DocsumFilter::~DocsumFilter() =default;
-
-void DocsumFilter::init(const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap)
-{
- if (_tools.get()) {
- const ResultClass *resClass = _tools->getResultClass();
- const std::vector<DocsumTools::FieldSpec> & inputSpecs = _tools->getFieldSpecs();
- if (resClass != nullptr) {
- uint32_t entryCnt = resClass->GetNumEntries();
- assert(entryCnt == inputSpecs.size());
- for (uint32_t i = 0; i < entryCnt; ++i) {
- const ResConfigEntry &entry = *resClass->GetEntry(i);
- const DocsumTools::FieldSpec & toolsSpec = inputSpecs[i];
- _fields.push_back(DocsumFieldSpec(entry._type, toolsSpec.getCommand()));
- LOG(debug, "About to prepare field spec for summary field '%s'", entry._bindname.c_str());
- prepareFieldSpec(_fields.back(), toolsSpec, fieldMap, fieldPathMap);
- }
- assert(entryCnt == _fields.size());
- }
- }
-}
-
-uint32_t
-DocsumFilter::getNumDocs() const
-{
- return std::numeric_limits<uint32_t>::max();
-}
-
-void
-DocsumFilter::writeField(const document::FieldValue & fv, const FieldPath & path, ResType type, ResultPacker & packer)
-{
- switch (type) {
- case RES_INT: {
- IntResultHandler rh;
- fv.iterateNested(path, rh);
- uint32_t val = rh.value;
- packer.AddInteger(val);
- break; }
- case RES_SHORT: {
- IntResultHandler rh;
- fv.iterateNested(path, rh);
- uint16_t val = rh.value;
- packer.AddShort(val);
- break; }
- case RES_BYTE: {
- IntResultHandler rh;
- fv.iterateNested(path, rh);
- uint8_t val = rh.value;
- packer.AddByte(val);
- break; }
- case RES_BOOL: {
- IntResultHandler rh;
- fv.iterateNested(path, rh);
- uint8_t val = rh.value;
- packer.AddByte(val);
- break; }
- case RES_FLOAT: {
- FloatResultHandler rh;
- fv.iterateNested(path, rh);
- float val = rh.value;
- packer.AddFloat(val);
- break; }
- case RES_DOUBLE: {
- DoubleResultHandler rh;
- fv.iterateNested(path, rh);
- double val = rh.value;
- packer.AddDouble(val);
- break; }
- case RES_INT64: {
- LongResultHandler rh;
- fv.iterateNested(path, rh);
- uint64_t val = rh.value;
- packer.AddInt64(val);
- break; }
- case RES_STRING:
- case RES_LONG_STRING:
- {
- StringResultHandler rh(type, packer);
- // the string result handler adds the result to the packer
- fv.iterateNested(path, rh);
- }
- break;
- case RES_DATA:
- case RES_LONG_DATA:
- {
- RawResultHandler rh(type, packer);
- // the raw result handler adds the result to the packer
- fv.iterateNested(path, rh);
- }
- break;
- default:
- LOG(warning, "Unknown docsum field type: %s", ResultConfig::GetResTypeName(type));
- packer.AddEmpty(); // unhandled output type
- break;
- }
-}
-
-
-void
-DocsumFilter::writeSlimeField(const DocsumFieldSpec & fieldSpec,
- const Document & docsum,
- ResultPacker & packer)
-{
- if (fieldSpec.getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) {
- const DocsumFieldSpec::FieldIdentifier & fieldId = fieldSpec.getOutputField();
- const document::FieldValue * fv = docsum.getField(fieldId.getId());
- if (fv != nullptr) {
- LOG(debug, "writeSlimeField: About to write field '%d' as Slime: field value = '%s'",
- fieldId.getId(), fv->toString().c_str());
- SlimeFieldWriter writer;
- if (! fieldSpec.hasIdentityMapping()) {
- writer.setInputFields(fieldSpec.getInputFields());
- }
- writer.convert(*fv);
- const vespalib::stringref out = writer.out();
- packer.AddLongString(out.data(), out.size());
- } else {
- LOG(debug, "writeSlimeField: Field value not set for field '%d'", fieldId.getId());
- packer.AddEmpty();
- }
- } else {
- LOG(debug, "writeSlimeField: Cannot handle this command");
- packer.AddEmpty();
- }
-}
-
-void
-DocsumFilter::writeFlattenField(const DocsumFieldSpec & fieldSpec,
- const Document & docsum,
- ResultPacker & packer)
-{
- if (fieldSpec.getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) {
- LOG(debug, "writeFlattenField: Cannot handle command NONE");
- packer.AddEmpty();
- return;
- }
-
- if (fieldSpec.getResultType() != RES_LONG_STRING &&
- fieldSpec.getResultType() != RES_STRING)
- {
- LOG(debug, "writeFlattenField: Can only handle result types STRING and LONG_STRING");
- packer.AddEmpty();
- return;
- }
-
- switch (fieldSpec.getCommand()) {
- case VsmsummaryConfig::Fieldmap::Command::FLATTENJUNIPER:
- _flattenWriter.setSeparator("\x1E"); // record separator (same as juniper uses)
- break;
- default:
- break;
- }
- const DocsumFieldSpec::FieldIdentifierVector & inputFields = fieldSpec.getInputFields();
- for (size_t i = 0; i < inputFields.size(); ++i) {
- const DocsumFieldSpec::FieldIdentifier & fieldId = inputFields[i];
- bool modified = false;
- const document::FieldValue * fv = getFieldValue(fieldId, fieldSpec.getCommand(), docsum, modified);
- if (fv != nullptr) {
- LOG(debug, "writeFlattenField: About to flatten field '%d' with field value (%s) '%s'",
- fieldId.getId(), modified ? "modified" : "original", fv->toString().c_str());
- if (modified) {
- fv->iterateNested(_emptyFieldPath, _flattenWriter);
- } else {
- fv->iterateNested(fieldId.getPath(), _flattenWriter);
- }
- } else {
- LOG(debug, "writeFlattenField: Field value not set for field '%d'", fieldId.getId());
- }
- }
-
- const CharBuffer & buf = _flattenWriter.getResult();
- switch (fieldSpec.getResultType()) {
- case RES_STRING:
- packer.AddString(buf.getBuffer(), buf.getPos());
- break;
- case RES_LONG_STRING:
- packer.AddLongString(buf.getBuffer(), buf.getPos());
- break;
- default:
- break;
- }
- _flattenWriter.clear();
-}
-
-
-void
-DocsumFilter::writeEmpty(ResType type, ResultPacker & packer)
-{
- // use the 'notdefined' values when writing numeric values
- switch (type) {
- case RES_INT:
- packer.AddInteger(std::numeric_limits<int32_t>::min());
- break;
- case RES_SHORT:
- packer.AddShort(std::numeric_limits<int16_t>::min());
- break;
- case RES_BYTE:
- packer.AddByte(0); // byte fields are unsigned so we have no 'notdefined' value.
- break;
- case RES_FLOAT:
- packer.AddFloat(std::numeric_limits<float>::quiet_NaN());
- break;
- case RES_DOUBLE:
- packer.AddDouble(std::numeric_limits<double>::quiet_NaN());
- break;
- case RES_INT64:
- packer.AddInt64(std::numeric_limits<int64_t>::min());
- break;
- default:
- packer.AddEmpty();
- break;
- }
-}
-
-uint32_t
-DocsumFilter::getSummaryClassId() const
-{
- return _tools->getResultClass() ? _tools->getResultClass()->GetClassID() : ResultConfig::NoClassID();
-}
-
-DocsumStoreValue
-DocsumFilter::getMappedDocsum(uint32_t id)
-{
- const ResultClass *resClass = _tools->getResultClass();
- if (resClass == nullptr) {
- return DocsumStoreValue(nullptr, 0);
- }
-
- const Document & doc = _docsumCache->getDocSum(id);
-
- _packer.Init(resClass->GetClassID());
- for (FieldSpecList::iterator it(_fields.begin()), end = _fields.end(); it != end; ++it) {
- ResType type = it->getResultType();
- if (type == RES_JSONSTRING) {
- // this really means 'structured data'
- writeSlimeField(*it, doc, _packer);
- } else {
- if (it->getInputFields().size() == 1 && it->getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) {
- const DocsumFieldSpec::FieldIdentifier & fieldId = it->getInputFields()[0];
- const document::FieldValue * field = doc.getField(fieldId.getId());
- if (field != nullptr) {
- writeField(*field, fieldId.getPath(), type, _packer);
- } else {
- writeEmpty(type, _packer); // void input
- }
- } else if (it->getInputFields().size() == 0 && it->getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) {
- LOG(spam, "0 inputfields for output field %u", it->getOutputField().getId());
- writeEmpty(type, _packer); // no input
- } else {
- writeFlattenField(*it, doc, _packer);
- }
- }
- }
-
- const char *buf;
- uint32_t buflen;
- bool ok = _packer.GetDocsumBlob(&buf, &buflen);
- if (ok) {
- return DocsumStoreValue(buf, buflen);
- } else {
- return DocsumStoreValue(nullptr, 0);
- }
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h b/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h
deleted file mode 100644
index e6f7ae3e6fe..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vsm/common/docsum.h>
-#include <vespa/vsm/common/fieldmodifier.h>
-#include <vespa/vsm/vsm/docsumfieldspec.h>
-#include <vespa/vsm/vsm/fieldsearchspec.h>
-#include <vespa/vsm/vsm/flattendocsumwriter.h>
-#include <vespa/vsm/vsm/vsm-adapter.h>
-#include <vespa/searchsummary/docsummary/resultpacker.h>
-#include <vespa/searchsummary/docsummary/docsumstore.h>
-
-using search::docsummary::IDocsumStore;
-using search::docsummary::DocsumStoreValue;
-using search::docsummary::ResType;
-using search::docsummary::ResultPacker;
-
-namespace vsm {
-
-/**
- * This class implements the IDocsumStore interface such that docsum blobs
- * can be fetched based on local document id. The docsum blobs are generated
- * on the fly when requested.
- **/
-class DocsumFilter : public IDocsumStore
-{
-private:
- typedef std::vector<DocsumFieldSpec> FieldSpecList; // list of summary field specs
- typedef std::vector<vespalib::string> StringList;
- typedef StringFieldIdTMap FieldMap;
-
- const IDocSumCache * _docsumCache;
- DocsumToolsPtr _tools;
- FieldSpecList _fields; // list of summary fields to generate
- size_t _highestFieldNo;
- ResultPacker _packer;
- FlattenDocsumWriter _flattenWriter;
- const FieldModifierMap * _snippetModifiers;
- document::FieldValue::UP _cachedValue;
- document::FieldPath _emptyFieldPath;
-
- DocsumFilter(const DocsumFilter &);
- DocsumFilter &operator=(const DocsumFilter &);
- void prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldSpec & toolsSpec,
- const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap);
- const document::FieldValue * getFieldValue(const DocsumFieldSpec::FieldIdentifier & fieldId,
- VsmsummaryConfig::Fieldmap::Command command,
- const Document & docsum, bool & modified);
- void writeField(const document::FieldValue & fv, const FieldPath & path, ResType type, ResultPacker & packer);
- void writeSlimeField(const DocsumFieldSpec & fieldSpec, const Document & docsum, ResultPacker & packer);
- void writeFlattenField(const DocsumFieldSpec & fieldSpec, const Document & docsum, ResultPacker & packer);
- void writeEmpty(ResType type, ResultPacker & packer);
-
-public:
- DocsumFilter(const DocsumToolsPtr & tools, const IDocSumCache & docsumCache);
- ~DocsumFilter() override;
- const DocsumToolsPtr & getTools() const { return _tools; }
-
- /**
- * Initializes this docsum filter using the given field map and field path map.
- * The field map is used to map from field name to field id.
- * The field path map is used to retrieve the field path for each input field.
- *
- * @param fieldMap maps from field name -> field id
- * @param fieldPathMap maps from field id -> field path
- **/
- void init(const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap);
-
- /**
- * Sets the snippet modifiers to use when writing string fields used as input to snippet generation.
- **/
- void setSnippetModifiers(const FieldModifierMap & modifiers) { _snippetModifiers = &modifiers; }
-
- /**
- * Returns the highest field id + 1 among all fields in the field spec list.
- **/
- size_t getHighestFieldNo() const { return _highestFieldNo; }
-
-
- void setDocSumStore(const IDocSumCache & docsumCache) { _docsumCache = &docsumCache; }
-
- // Inherit doc from IDocsumStore
- DocsumStoreValue getMappedDocsum(uint32_t id) override;
- uint32_t getNumDocs() const override;
- uint32_t getSummaryClassId() const override;
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
deleted file mode 100644
index 7043e63ec87..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "fieldsearchspec.h"
-#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8substringsearcher.h>
-#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h>
-#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h>
-#include <vespa/vsm/searcher/intfieldsearcher.h>
-#include <vespa/vsm/searcher/boolfieldsearcher.h>
-#include <vespa/vsm/searcher/floatfieldsearcher.h>
-#include <vespa/vsm/searcher/geo_pos_field_searcher.h>
-#include <vespa/vespalib/stllike/asciistream.h>
-#include <regex>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.fieldsearchspec");
-
-#define DEBUGMASK 0x01
-
-using search::streaming::ConstQueryTermList;
-using search::streaming::Query;
-using search::streaming::QueryTerm;
-
-namespace vsm {
-
-namespace {
-
-void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
- if (arg1 == "prefix") {
- searcher->setMatchType(FieldSearcher::PREFIX);
- } else if (arg1 == "substring") {
- searcher->setMatchType(FieldSearcher::SUBSTRING);
- } else if (arg1 == "suffix") {
- searcher->setMatchType(FieldSearcher::SUFFIX);
- } else if (arg1 == "exact") {
- searcher->setMatchType(FieldSearcher::EXACT);
- } else if (arg1 == "word") {
- searcher->setMatchType(FieldSearcher::EXACT);
- }
-}
-
-}
-
-FieldSearchSpec::FieldSearchSpec() :
- _id(0),
- _name(),
- _maxLength(0x100000),
- _searcher(),
- _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE),
- _arg1(),
- _reconfigured(false)
-{
-}
-FieldSearchSpec::~FieldSearchSpec() = default;
-
-FieldSearchSpec::FieldSearchSpec(FieldSearchSpec&& rhs) noexcept = default;
-FieldSearchSpec& FieldSearchSpec::operator=(FieldSearchSpec&& rhs) noexcept = default;
-
-FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & fname,
- VsmfieldsConfig::Fieldspec::Searchmethod searchDef,
- const vespalib::string & arg1, size_t maxLength_) :
- _id(fid),
- _name(fname),
- _maxLength(maxLength_),
- _searcher(),
- _searchMethod(searchDef),
- _arg1(arg1),
- _reconfigured(false)
-{
- switch(searchDef) {
- default:
- LOG(warning, "Unknown searchdef = %d. Defaulting to AUTOUTF8", static_cast<int>(searchDef));
- [[fallthrough]];
- case VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::NONE:
- case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8:
- if (arg1 == "substring") {
- _searcher = std::make_unique<UTF8SubStringFieldSearcher>(fid);
- } else if (arg1 == "suffix") {
- _searcher = std::make_unique<UTF8SuffixStringFieldSearcher>(fid);
- } else if (arg1 == "exact") {
- _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (arg1 == "word") {
- _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (searchDef == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) {
- _searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid);
- } else {
- _searcher = std::make_unique<FUTF8StrChrFieldSearcher>(fid);
- }
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::BOOL:
- _searcher = std::make_unique<BoolFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT16:
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT32:
- case VsmfieldsConfig::Fieldspec::Searchmethod::INT64:
- _searcher = std::make_unique<IntFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::FLOAT:
- _searcher = std::make_unique<FloatFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::DOUBLE:
- _searcher = std::make_unique<DoubleFieldSearcher>(fid);
- break;
- case VsmfieldsConfig::Fieldspec::Searchmethod::GEOPOS:
- _searcher = std::make_unique<GeoPosFieldSearcher>(fid);
- break;
- }
- if (_searcher) {
- setMatchType(_searcher, arg1);
- _searcher->maxFieldLength(maxLength());
- }
-}
-
-void
-FieldSearchSpec::reconfig(const QueryTerm & term)
-{
- if (_reconfigured) {
- return;
- }
- switch (_searchMethod) {
- case VsmfieldsConfig::Fieldspec::Searchmethod::NONE:
- case VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8:
- case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8:
- if ((term.isSubstring() && _arg1 != "substring") ||
- (term.isSuffix() && _arg1 != "suffix") ||
- (term.isExactstring() && _arg1 != "exact") ||
- (term.isPrefix() && _arg1 == "suffix"))
- {
- _searcher = std::make_unique<UTF8FlexibleStringFieldSearcher>(id());
- // preserve the basic match property of the searcher
- setMatchType(_searcher, _arg1);
- LOG(debug, "Reconfigured to use UTF8FlexibleStringFieldSearcher (%s) for field '%s' with id '%d'",
- _searcher->prefix() ? "prefix" : "regular", name().c_str(), id());
- _reconfigured = true;
- }
- break;
- default:
- break;
- }
-}
-
-vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f)
-{
- os << f._id << ' ' << f._name << ' ';
- if ( ! f._searcher) {
- os << " No searcher defined.\n";
- }
- return os;
-}
-
-FieldSearchSpecMap::FieldSearchSpecMap() = default;
-
-FieldSearchSpecMap::~FieldSearchSpecMap() = default;
-
-namespace {
- const std::string _G_empty("");
- const std::string _G_value(".value");
- const std::regex _G_map1("\\{[a-zA-Z0-9]+\\}");
- const std::regex _G_map2("\\{\".*\"\\}");
- const std::regex _G_array("\\[[0-9]+\\]");
-}
-
-vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & rawIndex)
-{
- if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) {
- std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value);
- index = std::regex_replace(index, _G_map2, _G_value);
- index = std::regex_replace(index, _G_array, _G_empty);
- return index;
- }
- return rawIndex;
-}
-
-bool FieldSearchSpecMap::buildFieldsInQuery(const Query & query, StringFieldIdTMap & fieldsInQuery) const
-{
- bool retval(true);
- ConstQueryTermList qtl;
- query.getLeafs(qtl);
-
- for (const auto & term : qtl) {
- for (const auto & dtm : documentTypeMap()) {
- const IndexFieldMapT & fim = dtm.second;
- vespalib::string rawIndex(term->index());
- vespalib::string index(stripNonFields(rawIndex));
- IndexFieldMapT::const_iterator fIt = fim.find(index);
- if (fIt != fim.end()) {
- for(FieldIdT fid : fIt->second) {
- const FieldSearchSpec & spec = specMap().find(fid)->second;
- LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.c_str(), index.c_str());
- if ((rawIndex != index) && (spec.name().find(index) == 0)) {
- vespalib::string modIndex(rawIndex);
- modIndex.append(spec.name().substr(index.size()));
- fieldsInQuery.add(modIndex, spec.id());
- } else {
- fieldsInQuery.add(spec.name(),spec.id());
- }
- }
- } else {
- LOG(warning, "No valid indexes registered for index %s", term->index().c_str());
- retval = false;
- }
- }
- }
- return retval;
-}
-
-void FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded)
-{
- for(size_t i(0), m(otherFieldsNeeded.size()); i < m; i++) {
- LOG(debug, "otherFieldsNeeded[%zd] = '%s'", i, otherFieldsNeeded[i].c_str());
- _nameIdMap.add(otherFieldsNeeded[i]);
- }
-}
-
-namespace {
-
-FieldIdTList
-buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearchSpecMapT & specMap,
- const VsmfieldsConfig::Documenttype::IndexVector & indexes)
-{
- LOG(spam, "Index %s with %zd fields", ci.name.c_str(), ci.field.size());
- FieldIdTList ifm;
- for (const VsmfieldsConfig::Documenttype::Index::Field & cf : ci.field) {
- LOG(spam, "Parsing field %s", cf.name.c_str());
- auto foundIndex = std::find_if(indexes.begin(), indexes.end(),
- [&cf](const auto & v) { return v.name == cf.name;});
- if ((foundIndex != indexes.end()) && (cf.name != ci.name)) {
- FieldIdTList sub = buildFieldSet(*foundIndex, specMap, indexes);
- ifm.insert(ifm.end(), sub.begin(), sub.end());
- } else {
- auto foundField = std::find_if(specMap.begin(), specMap.end(),
- [&cf](const auto & v) { return v.second.name() == cf.name;} );
- if (foundField != specMap.end()) {
- ifm.push_back(foundField->second.id());
- } else {
- LOG(warning, "Field %s not defined. Ignoring....", cf.name.c_str());
- }
- }
- }
- return ifm;
-}
-
-}
-
-bool FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
-{
- bool retval(true);
- LOG(spam, "Parsing %zd fields", conf->fieldspec.size());
- for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) {
- LOG(spam, "Parsing %s", cfs.name.c_str());
- FieldIdT fieldId = specMap().size();
- FieldSearchSpec fss(fieldId, cfs.name, cfs.searchmethod, cfs.arg1.c_str(), cfs.maxlength);
- _specMap[fieldId] = std::move(fss);
- _nameIdMap.add(cfs.name, fieldId);
- LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str());
- }
-
- LOG(spam, "Parsing %zd document types", conf->documenttype.size());
- for(const VsmfieldsConfig::Documenttype & di : conf->documenttype) {
- IndexFieldMapT indexMapp;
- LOG(spam, "Parsing document type %s with %zd indexes", di.name.c_str(), di.index.size());
- for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) {
- indexMapp[ci.name] = buildFieldSet(ci, specMap(), di.index);
- }
- _documentTypeMap[di.name] = indexMapp;
- }
- return retval;
-}
-
-void
-FieldSearchSpecMap::reconfigFromQuery(const Query & query)
-{
- ConstQueryTermList qtl;
- query.getLeafs(qtl);
-
- for (const auto & termA : qtl) {
- for (const auto & ifm : documentTypeMap()) {
- IndexFieldMapT::const_iterator itc = ifm.second.find(termA->index());
- if (itc != ifm.second.end()) {
- for (FieldIdT fid : itc->second) {
- FieldSearchSpec & spec = _specMap.find(fid)->second;
- spec.reconfig(*termA);
- }
- }
- }
- }
-}
-
-bool lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b)
-{
- return a->field() < b->field();
-}
-
-void FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap)
-{
- fieldSearcherMap.clear();
- for (const auto & entry : fieldsInQuery) {
- FieldIdT fId = entry.second;
- const FieldSearchSpec & spec = specMap().find(fId)->second;
- fieldSearcherMap.emplace_back(spec.searcher().duplicate());
- }
- std::sort(fieldSearcherMap.begin(), fieldSearcherMap.end(), lesserField);
-}
-
-
-vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df)
-{
- os << "DocumentTypeMap = \n";
- for (const auto & dtm : df.documentTypeMap()) {
- os << "DocType = " << dtm.first << "\n";
- os << "IndexMap = \n";
- for (const auto &index : dtm.second) {
- os << index.first << ": ";
- for (FieldIdT fid : index.second) {
- os << fid << ' ';
- }
- os << '\n';
- }
- }
- os << "SpecMap = \n";
- for (const auto & entry : df.specMap()) {
- os << entry.first << " = " << entry.second << '\n';
- }
- os << "NameIdMap = \n" << df.nameIdMap();
- return os;
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
deleted file mode 100644
index 7b78a8634e0..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/vsm/searcher/fieldsearcher.h>
-#include <vespa/vsm/config/vsm-cfif.h>
-
-namespace vsm {
-
-class FieldSearchSpec
-{
-public:
- FieldSearchSpec();
- FieldSearchSpec(const FieldIdT & id, const vespalib::string & name,
- VsmfieldsConfig::Fieldspec::Searchmethod searchMethod,
- const vespalib::string & arg1, size_t maxLength);
- ~FieldSearchSpec();
- FieldSearchSpec(FieldSearchSpec&& rhs) noexcept;
- FieldSearchSpec& operator=(FieldSearchSpec&& rhs) noexcept;
- const FieldSearcher & searcher() const { return *_searcher; }
- const vespalib::string & name() const { return _name; }
- FieldIdT id() const { return _id; }
- bool valid() const { return static_cast<bool>(_searcher); }
- size_t maxLength() const { return _maxLength; }
-
- /**
- * Reconfigures the field searcher based on information in the given query term.
- **/
- void reconfig(const search::streaming::QueryTerm & term);
-
- friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f);
-
-private:
- FieldIdT _id;
- vespalib::string _name;
- size_t _maxLength;
- FieldSearcherContainer _searcher;
- VsmfieldsConfig::Fieldspec::Searchmethod _searchMethod;
- vespalib::string _arg1;
- bool _reconfigured;
-};
-
-typedef std::map<FieldIdT, FieldSearchSpec> FieldSearchSpecMapT;
-
-class FieldSearchSpecMap
-{
-public:
- FieldSearchSpecMap();
- ~FieldSearchSpecMap();
-
- /**
- * Iterates over all fields in the vsmfields config and creates a mapping from field id to FieldSearchSpec objects
- * and a mapping from field name to field id. It then iterates over all document types and index names
- * and creates a mapping from index name to list of field ids for each document type.
- **/
- bool buildFromConfig(const VsmfieldsHandle & conf);
-
- /**
- * Iterates over the given field name vector adding extra elements to the mapping from field name to field id.
- **/
- void buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded);
-
- /**
- * Reconfigures some of the field searchers based on information in the given query.
- **/
- void reconfigFromQuery(const search::streaming::Query & query);
-
- /**
- * Adds a [field name, field id] entry to the given mapping for each field name used in the given query.
- * This is achieved by mapping from query term index name -> list of field ids -> [field name, field id] pairs.
- **/
- bool buildFieldsInQuery(const search::streaming::Query & query, StringFieldIdTMap & fieldsInQuery) const;
-
- /**
- * Adds a [field name, field id] entry to the given mapping for each field name in the given vector.
- **/
- void buildFieldsInQuery(const std::vector<vespalib::string> & otherFieldsNeeded, StringFieldIdTMap & fieldsInQuery) const;
-
- /**
- * Adds a FieldSearcher object to the given field searcher map for each field name in the other map.
- **/
- void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap);
-
- const FieldSearchSpecMapT & specMap() const { return _specMap; }
- //const IndexFieldMapT & indexMap() const { return _documentTypeMap.begin()->second; }
- const DocumentTypeIndexFieldMapT & documentTypeMap() const { return _documentTypeMap; }
- const StringFieldIdTMap & nameIdMap() const { return _nameIdMap; }
- friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & f);
-
- static vespalib::string stripNonFields(const vespalib::string & rawIndex);
-
-private:
- FieldSearchSpecMapT _specMap; // mapping from field id to field search spec
- DocumentTypeIndexFieldMapT _documentTypeMap; // mapping from index name to field id list for each document type
- StringFieldIdTMap _nameIdMap; // mapping from field name to field id
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp b/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp
deleted file mode 100644
index 06b652d85e6..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "flattendocsumwriter.h"
-#include <vespa/document/fieldvalue/fieldvalues.h>
-
-namespace vsm {
-
-void
-FlattenDocsumWriter::considerSeparator()
-{
- if (_useSeparator) {
- _output.put(_separator.c_str(), _separator.size());
- }
-}
-
-void
-FlattenDocsumWriter::onPrimitive(uint32_t, const Content & c)
-{
- considerSeparator();
- const document::FieldValue & fv = c.getValue();
- if (fv.isLiteral()) {
- const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv);
- vespalib::stringref value = lfv.getValueRef();
- _output.put(value.data(), value.size());
- } else if (fv.isNumeric() ||
- fv.isA(document::FieldValue::Type::BOOL))
- {
- vespalib::string value = fv.getAsString();
- _output.put(value.data(), value.size());
- } else {
- vespalib::string value = fv.toString();
- _output.put(value.data(), value.size());
- }
- _useSeparator = true;
-}
-
-FlattenDocsumWriter::FlattenDocsumWriter(const vespalib::string & separator) :
- _output(32),
- _separator(separator),
- _useSeparator(false)
-{ }
-
-FlattenDocsumWriter::~FlattenDocsumWriter() = default;
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h b/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h
deleted file mode 100644
index 47c6f1e75d0..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <vespa/document/fieldvalue/fieldvalue.h>
-#include <vespa/document/fieldvalue/iteratorhandler.h>
-#include <vespa/vsm/common/charbuffer.h>
-
-namespace vsm {
-
-/**
- * This class is used to flatten out and write a complex field value.
- * A separator string is inserted between primitive field values.
- **/
-class FlattenDocsumWriter : public document::fieldvalue::IteratorHandler {
-private:
- CharBuffer _output;
- vespalib::string _separator;
- bool _useSeparator;
-
- void considerSeparator();
- void onPrimitive(uint32_t, const Content & c) override;
-
-public:
- FlattenDocsumWriter(const vespalib::string & separator = " ");
- ~FlattenDocsumWriter();
- void setSeparator(const vespalib::string & separator) { _separator = separator; }
- const CharBuffer & getResult() const { return _output; }
- void clear() {
- _output.reset();
- _separator = " ";
- _useSeparator = false;
- }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h b/streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h
deleted file mode 100644
index a35cea40cec..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <memory>
-
-namespace search {
-class MatchingElements;
-class MatchingElementsFields;
-}
-
-namespace vsm {
-
-/*
- * Interface class for filling matching elements structure for
- * streaming search.
- */
-class IMatchingElementsFiller {
-public:
- virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) = 0;
- virtual ~IMatchingElementsFiller() = default;
-};
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp b/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp
deleted file mode 100644
index 5bc5798fb9d..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "slimefieldwriter.h"
-#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
-#include <vespa/vespalib/stllike/asciistream.h>
-#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/searchsummary/docsummary/resultconfig.h>
-#include <vespa/document/datatype/positiondatatype.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.slimefieldwriter");
-
-namespace {
-
-vespalib::string
-toString(const vsm::FieldPath & fieldPath)
-{
- vespalib::asciistream oss;
- for (size_t i = 0; i < fieldPath.size(); ++i) {
- if (i > 0) {
- oss << ".";
- }
- oss << fieldPath[i].getName();
- }
- return oss.str();
-}
-
-vespalib::string
-toString(const std::vector<vespalib::string> & fieldPath)
-{
- vespalib::asciistream oss;
- for (size_t i = 0; i < fieldPath.size(); ++i) {
- if (i > 0) {
- oss << ".";
- }
- oss << fieldPath[i];
- }
- return oss.str();
-}
-
-} // namespace <unnamed>
-
-using namespace vespalib::slime::convenience;
-
-
-namespace vsm {
-
-void
-SlimeFieldWriter::traverseRecursive(const document::FieldValue & fv, Inserter &inserter)
-{
- LOG(debug, "traverseRecursive: class(%s), fieldValue(%s), currentPath(%s)",
- fv.className(), fv.toString().c_str(), toString(_currPath).c_str());
-
- if (fv.isCollection()) {
- const document::CollectionFieldValue & cfv = static_cast<const document::CollectionFieldValue &>(fv);
- if (cfv.isA(document::FieldValue::Type::ARRAY)) {
- const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(cfv);
- Cursor &a = inserter.insertArray();
- for (size_t i = 0; i < afv.size(); ++i) {
- const document::FieldValue & nfv = afv[i];
- ArrayInserter ai(a);
- traverseRecursive(nfv, ai);
- }
- } else {
- assert(cfv.isA(document::FieldValue::Type::WSET));
- const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(cfv);
- Cursor &a = inserter.insertArray();
- Symbol isym = a.resolve("item");
- Symbol wsym = a.resolve("weight");
- for (const auto &entry : wsfv) {
- Cursor &o = a.addObject();
- const document::FieldValue & nfv = *entry.first;
- ObjectSymbolInserter oi(o, isym);
- traverseRecursive(nfv, oi);
- int weight = static_cast<const document::IntFieldValue &>(*entry.second).getValue();
- o.setLong(wsym, weight);
- }
- }
- } else if (fv.isA(document::FieldValue::Type::MAP)) {
- const document::MapFieldValue & mfv = static_cast<const document::MapFieldValue &>(fv);
- Cursor &a = inserter.insertArray();
- Symbol keysym = a.resolve("key");
- Symbol valsym = a.resolve("value");
- for (const auto &entry : mfv) {
- Cursor &o = a.addObject();
- ObjectSymbolInserter ki(o, keysym);
- traverseRecursive(*entry.first, ki);
- _currPath.push_back("value");
- ObjectSymbolInserter vi(o, valsym);
- traverseRecursive(*entry.second, vi);
- _currPath.pop_back();
- }
- } else if (fv.isStructured()) {
- const document::StructuredFieldValue & sfv = static_cast<const document::StructuredFieldValue &>(fv);
- Cursor &o = inserter.insertObject();
- if (sfv.getDataType() == &document::PositionDataType::getInstance()
- && search::docsummary::ResultConfig::wantedV8geoPositions())
- {
- bool ok = true;
- try {
- int x = std::numeric_limits<int>::min();
- int y = std::numeric_limits<int>::min();
- for (const document::Field & entry : sfv) {
- document::FieldValue::UP fval(sfv.getValue(entry));
- if (entry.getName() == "x") {
- x = fval->getAsInt();
- } else if (entry.getName() == "y") {
- y = fval->getAsInt();
- } else {
- ok = false;
- }
- }
- if (x == std::numeric_limits<int>::min()) ok = false;
- if (y == std::numeric_limits<int>::min()) ok = false;
- if (ok) {
- o.setDouble("lat", double(y) / 1.0e6);
- o.setDouble("lng", double(x) / 1.0e6);
- return;
- }
- } catch (std::exception &e) {
- (void)e;
- // fallback to code below
- }
- }
- for (const document::Field & entry : sfv) {
- if (explorePath(entry.getName())) {
- _currPath.push_back(entry.getName());
- Memory keymem(entry.getName());
- ObjectInserter oi(o, keymem);
- document::FieldValue::UP fval(sfv.getValue(entry));
- traverseRecursive(*fval, oi);
- _currPath.pop_back();
- }
- }
- } else {
- if (fv.isLiteral()) {
- const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv);
- inserter.insertString(lfv.getValueRef());
- } else if (fv.isNumeric()) {
- switch (fv.getDataType()->getId()) {
- case document::DataType::T_BYTE:
- case document::DataType::T_SHORT:
- case document::DataType::T_INT:
- case document::DataType::T_LONG:
- inserter.insertLong(fv.getAsLong());
- break;
- case document::DataType::T_DOUBLE:
- inserter.insertDouble(fv.getAsDouble());
- break;
- case document::DataType::T_FLOAT:
- inserter.insertDouble(fv.getAsFloat());
- break;
- default:
- inserter.insertString(fv.getAsString());
- }
- } else if (fv.isA(document::FieldValue::Type::BOOL)) {
- const auto & bfv = static_cast<const document::BoolFieldValue &>(fv);
- inserter.insertBool(bfv.getValue());
- } else {
- inserter.insertString(fv.toString());
- }
- }
-}
-
-bool
-SlimeFieldWriter::explorePath(vespalib::stringref candidate)
-{
- if (_inputFields == nullptr) {
- return true;
- }
- // find out if we should explore the current path
- for (size_t i = 0; i < _inputFields->size(); ++i) {
- const FieldPath & fp = (*_inputFields)[i].getPath();
- if (_currPath.size() <= fp.size()) {
- bool equal = true;
- for (size_t j = 0; j < _currPath.size() && equal; ++j) {
- equal = (fp[j].getName() == _currPath[j]);
- }
- if (equal) {
- if (_currPath.size() == fp.size()) {
- return true;
- } else if (fp[_currPath.size()].getName() == candidate) {
- // the current path matches one of the input field paths
- return true;
- }
- }
- }
- }
- return false;
-}
-
-SlimeFieldWriter::SlimeFieldWriter() :
- _rbuf(4_Ki),
- _slime(),
- _inputFields(nullptr),
- _currPath()
-{
-}
-
-SlimeFieldWriter::~SlimeFieldWriter() = default;
-
-void
-SlimeFieldWriter::convert(const document::FieldValue & fv)
-{
- if (LOG_WOULD_LOG(debug)) {
- if (_inputFields != nullptr) {
- for (size_t i = 0; i < _inputFields->size(); ++i) {
- LOG(debug, "write: input field path [%zd] '%s'", i, toString((*_inputFields)[i].getPath()).c_str());
- }
- } else {
- LOG(debug, "write: no input fields");
- }
- }
- SlimeInserter inserter(_slime);
- traverseRecursive(fv, inserter);
- search::SlimeOutputRawBufAdapter adapter(_rbuf);
- vespalib::slime::BinaryFormat::encode(_slime, adapter);
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h b/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h
deleted file mode 100644
index b5adac8985f..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "docsumfieldspec.h"
-#include <vespa/vsm/common/storagedocument.h>
-#include <vespa/document/fieldvalue/fieldvalues.h>
-#include <vespa/vespalib/data/slime/slime.h>
-#include <vespa/searchlib/util/rawbuf.h>
-
-namespace vsm {
-
-/**
- * This class is used to write a field value as slime binary data.
- * If only a subset of the field value should be written this subset
- * is specified using the setInputFields() function.
- **/
-class SlimeFieldWriter
-{
-private:
- search::RawBuf _rbuf;
- vespalib::Slime _slime;
- const DocsumFieldSpec::FieldIdentifierVector * _inputFields;
- std::vector<vespalib::string> _currPath;
-
- void traverseRecursive(const document::FieldValue & fv, vespalib::slime::Inserter & inserter);
- bool explorePath(vespalib::stringref candidate);
-
-public:
- SlimeFieldWriter();
- ~SlimeFieldWriter();
-
-
- /**
- * Specifies the subset of the field value that should be written.
- **/
- void setInputFields(const DocsumFieldSpec::FieldIdentifierVector & inputFields) { _inputFields = &inputFields; }
-
- /**
- * Convert the given field value
- **/
- void convert(const document::FieldValue & fv);
-
- /**
- * Return a reference to the output binary data
- **/
- vespalib::stringref out() const {
- return vespalib::stringref(_rbuf.GetDrainPos(), _rbuf.GetUsedLen());
- }
-
- void clear() {
- _rbuf.Reuse();
- _inputFields = nullptr;
- _currPath.clear();
- }
-};
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp b/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp
deleted file mode 100644
index 127302311f9..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "snippetmodifier.h"
-#include <vespa/document/fieldvalue/stringfieldvalue.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.snippetmodifier");
-
-using namespace document;
-using search::streaming::QueryTerm;
-using search::streaming::QueryTermList;
-typedef vespalib::hash_map<vsm::FieldIdT, QueryTermList> FieldQueryTermMap;
-
-namespace {
-
-void
-addIfNotPresent(FieldQueryTermMap & map, vsm::FieldIdT fId, QueryTerm * qt)
-{
- FieldQueryTermMap::iterator itr = map.find(fId);
- if (itr != map.end()) {
- QueryTermList & qtl = itr->second;
- if (std::find(qtl.begin(), qtl.end(), qt) == qtl.end()) {
- qtl.push_back(qt);
- }
- } else {
- map[fId].push_back(qt);
- }
-}
-
-}
-
-namespace vsm {
-
-void
-SnippetModifier::considerSeparator()
-{
- if (_useSep) {
- _valueBuf->put(_groupSep);
- }
-}
-
-void
-SnippetModifier::onPrimitive(uint32_t, const Content & c)
-{
- considerSeparator();
- _searcher->onValue(c.getValue());
- _valueBuf->put(_searcher->getModifiedBuf().getBuffer(), _searcher->getModifiedBuf().getPos());
- _useSep = true;
-}
-
-void
-SnippetModifier::reset()
-{
- _valueBuf->reset();
- _useSep = false;
-}
-
-
-SnippetModifier::SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher) :
- _searcher(searcher),
- _valueBuf(new CharBuffer(32)),
- _groupSep('\x1E'),
- _useSep(false),
- _empty()
-{
-}
-
-SnippetModifier::SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher, const CharBuffer::SP & valueBuf) :
- _searcher(searcher),
- _valueBuf(valueBuf),
- _groupSep('\x1E'),
- _useSep(false),
- _empty()
-{
-}
-
-SnippetModifier::~SnippetModifier() {}
-
-FieldValue::UP
-SnippetModifier::modify(const FieldValue & fv, const document::FieldPath & path)
-{
- reset();
- fv.iterateNested(path, *this);
- return FieldValue::UP(new StringFieldValue(vespalib::string(_valueBuf->getBuffer(), _valueBuf->getPos())));
-}
-
-
-SnippetModifierManager::SnippetModifierManager() :
- _modifiers(),
- _searchBuf(new SearcherBuf(64)),
- _searchModifyBuf(new CharBuffer(64)),
- _searchOffsetBuf(new std::vector<size_t>(64)),
- _modifierBuf(new CharBuffer(128))
-{
-}
-
-SnippetModifierManager::~SnippetModifierManager() {}
-
-void
-SnippetModifierManager::setup(const QueryTermList & queryTerms,
- const FieldSearchSpecMapT & specMap,
- const IndexFieldMapT & indexMap)
-{
- FieldQueryTermMap fqtm;
-
- // setup modifiers
- for (QueryTermList::const_iterator i = queryTerms.begin(); i != queryTerms.end(); ++i) {
- QueryTerm * qt = *i;
- IndexFieldMapT::const_iterator j = indexMap.find(qt->index());
- if (j != indexMap.end()) {
- for (FieldIdTList::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
- FieldIdT fId = *k;
- const FieldSearchSpec & spec = specMap.find(fId)->second;
- if (spec.searcher().substring() || qt->isSubstring()) { // we need a modifier for this field id
- addIfNotPresent(fqtm, fId, qt);
- if (_modifiers.getModifier(fId) == NULL) {
- LOG(debug, "Create snippet modifier for field id '%u'", fId);
- UTF8SubstringSnippetModifier::SP searcher
- (new UTF8SubstringSnippetModifier(fId, _searchModifyBuf, _searchOffsetBuf));
- _modifiers.map()[fId] = std::make_unique<SnippetModifier>(searcher, _modifierBuf);
- }
- }
- }
- }
- }
-
- // prepare modifiers
- for (auto & entry : _modifiers.map()) {
- FieldIdT fId = entry.first;
- SnippetModifier & smod = static_cast<SnippetModifier &>(*entry.second);
- smod.getSearcher()->prepare(fqtm[fId], _searchBuf);
- }
-}
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h b/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h
deleted file mode 100644
index 4718ab8783a..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include "fieldsearchspec.h"
-#include <vespa/vsm/common/charbuffer.h>
-#include <vespa/vsm/common/document.h>
-#include <vespa/vsm/common/fieldmodifier.h>
-#include <vespa/vsm/searcher/utf8substringsnippetmodifier.h>
-#include <vespa/document/fieldvalue/fieldvalue.h>
-#include <vespa/document/fieldvalue/iteratorhandler.h>
-
-namespace vsm {
-
-/**
- * This class is responsible for modifying field values where we have substring search and that are used
- * as input to snippet generation.
- *
- * The class implements the FieldModifier interface to modify field values, and the IteratorHandler interface
- * to traverse complex field values. Primitive field values are passed to the underlying searcher that is
- * responsible for modifying the field value by inserting unit separators before and after matches.
- * A group separator is inserted between primitive field values the same way as done by FlattenDocsumWriter.
- **/
-class SnippetModifier : public FieldModifier, public document::fieldvalue::IteratorHandler
-{
-private:
- UTF8SubstringSnippetModifier::SP _searcher;
- CharBuffer::SP _valueBuf; // buffer to store the final modified field value
- char _groupSep;
- bool _useSep;
- document::FieldPath _empty;
-
- void considerSeparator();
- // Inherrit doc from document::FieldValue::IteratorHandler
- void onPrimitive(uint32_t, const Content & c) override;
- void reset();
-
-public:
- /**
- * Creates a new instance.
- *
- * @param searcher the searcher used to modify primitive field values.
- **/
- SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher);
-
- /**
- * Creates a new instance.
- *
- * @param searcher the searcher used to modify primitive field values.
- * @param valueBuf the shared buffer used to store the final modified field value.
- **/
- SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher, const CharBuffer::SP & valueBuf);
-
- ~SnippetModifier();
-
- /**
- * Modifies the complete given field value.
- **/
- document::FieldValue::UP modify(const document::FieldValue & fv) override {
- return modify(fv, _empty);
- }
-
- /**
- * Modifies the given field value by passing all primitive field values to the searcher and
- * inserting group separators between them. A string field value is returned.
- * The iterating of the field value is limited by the given field path.
- *
- * @param fv the field value to modify.
- * @param path the field path used to iterate the field value.
- * @return the new modified field value.
- **/
- document::FieldValue::UP modify(const document::FieldValue & fv,
- const document::FieldPath & path) override;
-
- const CharBuffer & getValueBuf() const { return *_valueBuf; }
- const UTF8SubstringSnippetModifier::SP & getSearcher() const { return _searcher; }
-};
-
-/**
- * This class manages a set of snippet modifiers.
- * The modifiers are instantiated and prepared in the setup function.
- * This class also holds shared buffers that are used by the modifiers.
- **/
-class SnippetModifierManager
-{
-private:
- FieldModifierMap _modifiers;
- SharedSearcherBuf _searchBuf;
- CharBuffer::SP _searchModifyBuf;
- SharedOffsetBuffer _searchOffsetBuf;
- CharBuffer::SP _modifierBuf;
-
-public:
- SnippetModifierManager();
- ~SnippetModifierManager();
-
- /**
- * Setups snippet modifiers for all fields where we have substring search.
- *
- * @param queryTerms the query terms to take into consideration.
- * @param specMap mapping from field id to search spec objects.
- * @param fieldMap mapping from index (used in the query) to a list of field ids.
- **/
- void setup(const search::streaming::QueryTermList & queryTerms,
- const FieldSearchSpecMapT & specMap, const IndexFieldMapT & fieldMap);
-
- const FieldModifierMap & getModifiers() const { return _modifiers; }
-};
-
-}
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp
deleted file mode 100644
index 5507532d4f3..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "vsm-adapter.hpp"
-#include "docsumconfig.h"
-#include "i_matching_elements_filler.h"
-#include <vespa/searchlib/common/matching_elements.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".vsm.vsm-adapter");
-
-using search::docsummary::ResConfigEntry;
-using search::docsummary::KeywordExtractor;
-using search::MatchingElements;
-using config::ConfigSnapshot;
-
-namespace vsm {
-
-GetDocsumsStateCallback::GetDocsumsStateCallback() :
- _summaryFeatures(),
- _rankFeatures(),
- _matching_elements_filler()
-{ }
-
-void GetDocsumsStateCallback::FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env)
-{
- (void) env;
- if (_summaryFeatures) { // set the summary features to write to the docsum
- state->_summaryFeatures = _summaryFeatures;
- state->_summaryFeaturesCached = true;
- }
-}
-
-void GetDocsumsStateCallback::FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env)
-{
- (void) env;
- if (_rankFeatures) { // set the rank features to write to the docsum
- state->_rankFeatures = _rankFeatures;
- }
-}
-
-void GetDocsumsStateCallback::FillDocumentLocations(GetDocsumsState *state, IDocsumEnvironment * env)
-{
- (void) state;
- (void) env;
-}
-
-std::unique_ptr<MatchingElements>
-GetDocsumsStateCallback::fill_matching_elements(const search::MatchingElementsFields& fields)
-{
- if (_matching_elements_filler) {
- return _matching_elements_filler->fill_matching_elements(fields);
- }
- return std::make_unique<MatchingElements>();
-}
-
-void
-GetDocsumsStateCallback::set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler)
-{
- _matching_elements_filler = std::move(matching_elements_filler);
-}
-
-GetDocsumsStateCallback::~GetDocsumsStateCallback() = default;
-
-DocsumTools::FieldSpec::FieldSpec() :
- _outputName(),
- _inputNames(),
- _command(VsmsummaryConfig::Fieldmap::Command::NONE)
-{ }
-
-DocsumTools::FieldSpec::~FieldSpec() = default;
-
-DocsumTools::DocsumTools(std::unique_ptr<DynamicDocsumWriter> writer) :
- _writer(std::move(writer)),
- _juniper(),
- _resultClass(),
- _fieldSpecs()
-{ }
-
-
-DocsumTools::~DocsumTools() = default;
-
-bool
-DocsumTools::obtainFieldNames(const FastS_VsmsummaryHandle &cfg)
-{
- uint32_t defaultSummaryId = getResultConfig()->LookupResultClassId(cfg->outputclass);
- _resultClass = getResultConfig()->LookupResultClass(defaultSummaryId);
- if (_resultClass != NULL) {
- for (uint32_t i = 0; i < _resultClass->GetNumEntries(); ++i) {
- const ResConfigEntry * entry = _resultClass->GetEntry(i);
- _fieldSpecs.push_back(FieldSpec());
- _fieldSpecs.back().setOutputName(entry->_bindname);
- bool found = false;
- if (cfg) {
- // check if we have this summary field in the vsmsummary config
- for (uint32_t j = 0; j < cfg->fieldmap.size() && !found; ++j) {
- if (entry->_bindname == cfg->fieldmap[j].summary.c_str()) {
- for (uint32_t k = 0; k < cfg->fieldmap[j].document.size(); ++k) {
- _fieldSpecs.back().getInputNames().push_back(cfg->fieldmap[j].document[k].field);
- }
- _fieldSpecs.back().setCommand(cfg->fieldmap[j].command);
- found = true;
- }
- }
- }
- if (!found) {
- // use yourself as input
- _fieldSpecs.back().getInputNames().push_back(entry->_bindname);
- }
- }
- } else {
- LOG(warning, "could not locate result class: '%s'", cfg->outputclass.c_str());
- }
- return true;
-}
-
-void
-VSMAdapter::configure(const VSMConfigSnapshot & snapshot)
-{
- std::lock_guard guard(_lock);
- LOG(debug, "(re-)configure VSM (docsum tools)");
-
- std::shared_ptr<SummaryConfig> summary(snapshot.getConfig<SummaryConfig>());
- std::shared_ptr<SummarymapConfig> summaryMap(snapshot.getConfig<SummarymapConfig>());
- std::shared_ptr<VsmsummaryConfig> vsmSummary(snapshot.getConfig<VsmsummaryConfig>());
- std::shared_ptr<JuniperrcConfig> juniperrc(snapshot.getConfig<JuniperrcConfig>());
-
- _fieldsCfg.set(snapshot.getConfig<VsmfieldsConfig>().release());
- _fieldsCfg.latch();
-
- LOG(debug, "configureFields(): Size of cfg fieldspec: %zd", _fieldsCfg.get()->fieldspec.size()); // UlfC: debugging
- LOG(debug, "configureFields(): Size of cfg documenttype: %zd", _fieldsCfg.get()->documenttype.size()); // UlfC: debugging
- LOG(debug, "configureSummary(): Size of cfg classes: %zd", summary->classes.size()); // UlfC: debugging
- LOG(debug, "configureSummaryMap(): Size of cfg override: %zd", summaryMap->override.size()); // UlfC: debugging
- LOG(debug, "configureVsmSummary(): Size of cfg fieldmap: %zd", vsmSummary->fieldmap.size()); // UlfC: debugging
- LOG(debug, "configureVsmSummary(): outputclass='%s'", vsmSummary->outputclass.c_str()); // UlfC: debugging
-
- // init result config
- std::unique_ptr<ResultConfig> resCfg(new ResultConfig());
- if ( ! resCfg->ReadConfig(*summary.get(), _configId.c_str())) {
- throw std::runtime_error("(re-)configuration of VSM (docsum tools) failed due to bad summary config");
- }
-
- // init keyword extractor
- auto kwExtractor = std::make_unique<KeywordExtractor>(nullptr);
- kwExtractor->AddLegalIndexSpec(_highlightindexes.c_str());
- vespalib::string spec = kwExtractor->GetLegalIndexSpec();
- LOG(debug, "index highlight spec: '%s'", spec.c_str());
-
- // create dynamic docsum writer
- auto writer = std::make_unique<DynamicDocsumWriter>(resCfg.release(), kwExtractor.release());
-
- // configure juniper (used when configuring DynamicDocsumConfig)
- _juniperProps = std::make_unique<JuniperProperties>(*juniperrc);
- auto juniper = std::make_unique<juniper::Juniper>(_juniperProps.get(), &_wordFolder);
-
- // create new docsum tools
- auto docsumTools = std::make_unique<DocsumTools>(std::move(writer));
- docsumTools->setJuniper(std::move(juniper));
-
- // configure dynamic docsum writer
- DynamicDocsumConfig dynDocsumConfig(docsumTools.get(), docsumTools->getDocsumWriter(), _fieldsCfg.get());
- dynDocsumConfig.configure(*summaryMap.get());
-
- // configure new docsum tools
- if (docsumTools->obtainFieldNames(vsmSummary)) {
- // latch new docsum tools into production
- _docsumTools.set(docsumTools.release());
- _docsumTools.latch();
- } else {
- throw std::runtime_error("(re-)configuration of VSM (docsum tools) failed");
- }
-}
-
-VSMConfigSnapshot::VSMConfigSnapshot(const vespalib::string & configId, const config::ConfigSnapshot & snapshot)
- : _configId(configId),
- _snapshot(std::make_unique<config::ConfigSnapshot>(snapshot))
-{ }
-VSMConfigSnapshot::~VSMConfigSnapshot() = default;
-
-VSMAdapter::VSMAdapter(const vespalib::string & highlightindexes, const vespalib::string & configId, Fast_WordFolder & wordFolder)
- : _highlightindexes(highlightindexes),
- _configId(configId),
- _wordFolder(wordFolder),
- _fieldsCfg(),
- _docsumTools(),
- _juniperProps(),
- _lock()
-{
-}
-
-
-VSMAdapter::~VSMAdapter() = default;
-
-}
diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h
deleted file mode 100644
index 6484269353b..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/searchlib/query/base.h>
-#include <vespa/vsm/config/vsm-cfif.h>
-#include <vespa/config-summary.h>
-#include <vespa/config-summarymap.h>
-#include <vespa/searchlib/common/featureset.h>
-#include <vespa/searchsummary/docsummary/docsumwriter.h>
-#include <vespa/searchsummary/docsummary/docsumstate.h>
-#include <vespa/searchsummary/docsummary/idocsumenvironment.h>
-#include <vespa/juniper/rpinterface.h>
-
-using search::docsummary::ResultConfig;
-using search::docsummary::ResultClass;
-using search::docsummary::IDocsumWriter;
-using search::docsummary::DynamicDocsumWriter;
-using search::docsummary::GetDocsumsState;
-using search::docsummary::IDocsumEnvironment;
-using search::docsummary::JuniperProperties;
-
-using vespa::config::search::SummaryConfig;
-using vespa::config::search::SummarymapConfig;
-using vespa::config::search::summary::JuniperrcConfig;
-
-namespace config { class ConfigSnapshot; }
-namespace vsm {
-
-class IMatchingElementsFiller;
-
-class GetDocsumsStateCallback : public search::docsummary::GetDocsumsStateCallback
-{
-private:
- search::FeatureSet::SP _summaryFeatures;
- search::FeatureSet::SP _rankFeatures;
- std::unique_ptr<IMatchingElementsFiller> _matching_elements_filler;
-
-public:
- GetDocsumsStateCallback();
- void FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override;
- void FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override;
- virtual void FillDocumentLocations(GetDocsumsState * state, IDocsumEnvironment * env);
- virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) override;
- void setSummaryFeatures(const search::FeatureSet::SP & sf) { _summaryFeatures = sf; }
- void setRankFeatures(const search::FeatureSet::SP & rf) { _rankFeatures = rf; }
- void set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler);
- ~GetDocsumsStateCallback();
-};
-
-class DocsumTools : public IDocsumEnvironment
-{
-public:
- class FieldSpec {
- private:
- vespalib::string _outputName;
- std::vector<vespalib::string> _inputNames;
- VsmsummaryConfig::Fieldmap::Command _command;
-
- public:
- FieldSpec();
- ~FieldSpec();
- const vespalib::string & getOutputName() const { return _outputName; }
- void setOutputName(const vespalib::string & name) { _outputName = name; }
- const std::vector<vespalib::string> & getInputNames() const { return _inputNames; }
- std::vector<vespalib::string> & getInputNames() { return _inputNames; }
- VsmsummaryConfig::Fieldmap::Command getCommand() const { return _command; }
- void setCommand(VsmsummaryConfig::Fieldmap::Command command) { _command = command; }
- };
-
-private:
- std::unique_ptr<DynamicDocsumWriter> _writer;
- std::unique_ptr<juniper::Juniper> _juniper;
- const ResultClass * _resultClass;
- std::vector<FieldSpec> _fieldSpecs;
- DocsumTools(const DocsumTools &);
- DocsumTools &operator=(const DocsumTools &);
-
-public:
- DocsumTools(std::unique_ptr<DynamicDocsumWriter> writer);
- ~DocsumTools();
- void setJuniper(std::unique_ptr<juniper::Juniper> juniper) { _juniper = std::move(juniper); }
- ResultConfig *getResultConfig() const { return _writer->GetResultConfig(); }
- DynamicDocsumWriter *getDocsumWriter() const { return _writer.get(); }
- const ResultClass *getResultClass() const { return _resultClass; }
- const std::vector<FieldSpec> & getFieldSpecs() const { return _fieldSpecs; }
- bool obtainFieldNames(const FastS_VsmsummaryHandle &cfg);
-
- // inherit doc from IDocsumEnvironment
- search::IAttributeManager * getAttributeManager() override { return NULL; }
- vespalib::string lookupIndex(const vespalib::string&) const override { return ""; }
- juniper::Juniper * getJuniper() override { return _juniper.get(); }
-};
-
-typedef std::shared_ptr<DocsumTools> DocsumToolsPtr;
-
-class VSMConfigSnapshot {
-private:
- const vespalib::string _configId;
- std::unique_ptr<const config::ConfigSnapshot> _snapshot;
-public:
- VSMConfigSnapshot(const vespalib::string & configId, const config::ConfigSnapshot & snapshot);
- ~VSMConfigSnapshot();
- template <typename ConfigType>
- std::unique_ptr<ConfigType> getConfig() const;
-};
-
-class VSMAdapter
-{
-public:
- VSMAdapter(const vespalib::string & highlightindexes, const vespalib::string & configId, Fast_WordFolder & wordFolder);
- virtual ~VSMAdapter();
-
- VsmfieldsHandle getFieldsConfig() const { return _fieldsCfg.get(); }
- DocsumToolsPtr getDocsumTools() const { return _docsumTools.get(); }
- void configure(const VSMConfigSnapshot & snapshot);
-private:
- vespalib::string _highlightindexes;
- const vespalib::string _configId;
- Fast_WordFolder & _wordFolder;
- vespalib::PtrHolder<VsmfieldsConfig> _fieldsCfg;
- vespalib::PtrHolder<DocsumTools> _docsumTools;
- std::unique_ptr<JuniperProperties> _juniperProps;
-
- std::mutex _lock;
-
- VSMAdapter(const VSMAdapter &);
- VSMAdapter &operator=(const VSMAdapter &);
-};
-
-} // namespace vsm
-
diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp
deleted file mode 100644
index f071dbb2015..00000000000
--- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "vsm-adapter.h"
-#include <vespa/config/retriever/configsnapshot.hpp>
-
-namespace vsm {
-
-template <typename ConfigType>
-std::unique_ptr<ConfigType>
-VSMConfigSnapshot::getConfig() const
-{
- return _snapshot->getConfig<ConfigType>(_configId);
-}
-
-} // namespace vsm
-