summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--client/go/go.mod2
-rw-r--r--client/go/go.sum2
-rw-r--r--container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java3
-rw-r--r--dependency-versions/pom.xml2
-rw-r--r--searchcore/src/tests/proton/docsummary/docsummary_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp80
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.h6
-rw-r--r--searchlib/src/vespa/searchlib/test/string_field_builder.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/util/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/util/linguisticsannotation.cpp (renamed from searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp)0
-rw-r--r--searchlib/src/vespa/searchlib/util/linguisticsannotation.h (renamed from searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h)0
-rw-r--r--searchlib/src/vespa/searchlib/util/token_extractor.cpp85
-rw-r--r--searchlib/src/vespa/searchlib/util/token_extractor.h27
-rw-r--r--searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp70
18 files changed, 150 insertions, 158 deletions
diff --git a/client/go/go.mod b/client/go/go.mod
index 89186abea2f..bf0e53a0f03 100644
--- a/client/go/go.mod
+++ b/client/go/go.mod
@@ -16,7 +16,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
github.com/zalando/go-keyring v0.2.3
- golang.org/x/net v0.16.0
+ golang.org/x/net v0.17.0
golang.org/x/sys v0.13.0
gopkg.in/yaml.v3 v3.0.1
)
diff --git a/client/go/go.sum b/client/go/go.sum
index 9347b3500bf..87282411b18 100644
--- a/client/go/go.sum
+++ b/client/go/go.sum
@@ -72,6 +72,8 @@ golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos=
golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210819135213-f52c844e1c1c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
diff --git a/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def b/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def
index d9c32376512..c1c0944d7eb 100644
--- a/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def
+++ b/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def
@@ -131,7 +131,7 @@ http2Enabled bool default=true
http2.streamIdleTimeout double default=600
-http2.maxConcurrentStreams int default=4096
+http2.maxConcurrentStreams int default=512
# Override the default server name when authority is missing from request.
serverName.fallback string default=""
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java
index fad70329136..287342f1290 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java
@@ -18,6 +18,7 @@ import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
+import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -32,6 +33,8 @@ import static com.yahoo.vespa.hosted.controller.notification.Notification.Type;
*/
public class NotificationsDb {
+ private static final Logger log = Logger.getLogger(NotificationsDb.class.getName());
+
private final Clock clock;
private final CuratorDb curatorDb;
private final Notifier notifier;
@@ -78,7 +81,10 @@ public class NotificationsDb {
notifications.add(notification);
curatorDb.writeNotifications(source.tenant(), notifications);
}
- changed.ifPresent(notifier::dispatch);
+ changed.ifPresent(c -> {
+ log.fine(() -> "New notification %s".formatted(c));
+ notifier.dispatch(c);
+ });
}
/** Remove the notification with the given source and type */
@@ -156,9 +162,11 @@ public class NotificationsDb {
private boolean notificationExists(Notification notification, List<Notification> existing, boolean mindHigherLevel) {
// Be conservative for now, only dispatch notifications if they are from new source or with new type.
// the message content and level is ignored for now
- return existing.stream().anyMatch(e ->
- notification.source().contains(e.source()) && notification.type().equals(e.type()) &&
+ boolean exists = existing.stream()
+ .anyMatch(e -> notification.source().contains(e.source()) && notification.type().equals(e.type()) &&
(!mindHigherLevel || notification.level().ordinal() <= e.level().ordinal()));
+ log.fine(() -> "%s in %s == %b".formatted(notification, existing, exists));
+ return exists;
}
private static Optional<Notification> createFeedBlockNotification(NotificationSource source, Instant at, ClusterMetrics metric) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java
index 4d8906f6fe5..afb260bf765 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java
@@ -8,7 +8,6 @@ import com.yahoo.restapi.UriBuilder;
import com.yahoo.text.Text;
import com.yahoo.vespa.flags.FetchVector;
import com.yahoo.vespa.flags.FlagSource;
-import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.controller.api.integration.organization.Mail;
import com.yahoo.vespa.hosted.controller.api.integration.organization.Mailer;
@@ -99,6 +98,8 @@ public class Notifier {
private void dispatch(Notification notification, Collection<TenantContacts.EmailContact> contacts) {
try {
+ log.fine(() -> "Sending notification " + notification + " to " +
+ contacts.stream().map(c -> c.email().getEmailAddress()).toList());
var content = formatter.format(notification);
mailer.send(mailOf(content, contacts.stream()
.filter(c -> c.email().isVerified())
diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml
index 5aff4935bee..c015e9a9a33 100644
--- a/dependency-versions/pom.xml
+++ b/dependency-versions/pom.xml
@@ -113,7 +113,7 @@
<mimepull.vespa.version>1.10.0</mimepull.vespa.version>
<mockito.vespa.version>5.6.0</mockito.vespa.version>
<mojo-executor.vespa.version>2.4.0</mojo-executor.vespa.version>
- <netty.vespa.version>4.1.99.Final</netty.vespa.version>
+ <netty.vespa.version>4.1.100.Final</netty.vespa.version>
<netty-tcnative.vespa.version>2.0.62.Final</netty-tcnative.vespa.version>
<onnxruntime.vespa.version>1.15.1</onnxruntime.vespa.version>
<opennlp.vespa.version>2.3.0</opennlp.vespa.version>
diff --git a/searchcore/src/tests/proton/docsummary/docsummary_test.cpp b/searchcore/src/tests/proton/docsummary/docsummary_test.cpp
index 1fcb1b09d94..8264ec6b680 100644
--- a/searchcore/src/tests/proton/docsummary/docsummary_test.cpp
+++ b/searchcore/src/tests/proton/docsummary/docsummary_test.cpp
@@ -23,12 +23,12 @@
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/tensor/tensor_attribute.h>
#include <vespa/searchlib/test/doc_builder.h>
+#include <vespa/searchlib/util/linguisticsannotation.h>
#include <vespa/searchlib/transactionlog/nosyncproxy.h>
#include <vespa/searchlib/transactionlog/translogserver.h>
#include <vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h>
#include <vespa/searchsummary/docsummary/i_docsum_store_document.h>
#include <vespa/searchsummary/docsummary/i_juniper_converter.h>
-#include <vespa/searchsummary/docsummary/linguisticsannotation.h>
#include <vespa/config-bucketspaces.h>
#include <vespa/config/helper/configgetter.hpp>
#include <vespa/document/annotation/annotation.h>
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
index 042b57f0486..2a54859352d 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
@@ -2,12 +2,8 @@
#include "field_inverter.h"
#include "ordered_field_index_inserter.h"
-#include <vespa/document/annotation/alternatespanlist.h>
#include <vespa/document/annotation/annotation.h>
#include <vespa/document/annotation/span.h>
-#include <vespa/document/annotation/spanlist.h>
-#include <vespa/document/annotation/spantree.h>
-#include <vespa/document/annotation/spantreevisitor.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
@@ -30,9 +26,7 @@ LOG_SETUP(".searchlib.memoryindex.fieldinverter");
namespace search::memoryindex {
-using document::AlternateSpanList;
using document::Annotation;
-using document::AnnotationType;
using document::ArrayFieldValue;
using document::DataType;
using document::Document;
@@ -40,79 +34,24 @@ using document::DocumentType;
using document::Field;
using document::FieldValue;
using document::IntFieldValue;
-using document::SimpleSpanList;
using document::Span;
-using document::SpanList;
-using document::SpanNode;
-using document::SpanTree;
-using document::SpanTreeVisitor;
using document::StringFieldValue;
using document::StructFieldValue;
using document::WeightedSetFieldValue;
using index::DocIdAndPosOccFeatures;
using index::Schema;
using search::index::schema::CollectionType;
+using search::linguistics::TokenExtractor;
using search::util::URL;
using vespalib::make_string;
using vespalib::datastore::Aligner;
-namespace documentinverterkludge::linguistics {
-
-const vespalib::string SPANTREE_NAME("linguistics");
-
-}
-
-using namespace documentinverterkludge;
-
-namespace {
-
-class SpanFinder : public SpanTreeVisitor {
-public:
- int32_t begin_pos;
- int32_t end_pos;
-
- SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {}
- Span span() { return Span(begin_pos, end_pos - begin_pos); }
-
- void visit(const Span &node) override {
- begin_pos = std::min(begin_pos, node.from());
- end_pos = std::max(end_pos, node.from() + node.length());
- }
- void visit(const SpanList &node) override {
- for (const auto & span_ : node) {
- const_cast<SpanNode *>(span_)->accept(*this);
- }
- }
- void visit(const SimpleSpanList &node) override {
- for (const auto & span_ : node) {
- const_cast<Span &>(span_).accept(*this);
- }
- }
- void visit(const AlternateSpanList &node) override {
- for (size_t i = 0; i < node.getNumSubtrees(); ++i) {
- visit(node.getSubtree(i));
- }
- }
-};
-
-Span
-getSpan(const SpanNode &span_node)
-{
- SpanFinder finder;
- // The SpanNode will not be changed.
- const_cast<SpanNode &>(span_node).accept(finder);
- return finder.span();
-}
-
-}
-
void
FieldInverter::processAnnotations(const StringFieldValue &value, const Document& doc)
{
_terms.clear();
- StringFieldValue::SpanTrees spanTrees = value.getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(spanTrees, linguistics::SPANTREE_NAME);
- if (tree == nullptr) {
+ auto span_trees = value.getSpanTrees();
+ if (!TokenExtractor::extract(false, _terms, span_trees)) {
/* This is wrong unless field is exact match */
const vespalib::string &text = value.getValue();
if (text.empty()) {
@@ -126,19 +65,6 @@ FieldInverter::processAnnotations(const StringFieldValue &value, const Document&
return;
}
const vespalib::string &text = value.getValue();
- for (const Annotation & annotation : *tree) {
- const SpanNode *span = annotation.getSpanNode();
- if ((span != nullptr) && annotation.valid() &&
- (annotation.getType() == *AnnotationType::TERM))
- {
- Span sp = getSpan(*span);
- if (sp.length() != 0) {
- _terms.push_back(std::make_pair(sp,
- annotation.getFieldValue()));
- }
- }
- }
- std::sort(_terms.begin(), _terms.end());
auto it = _terms.begin();
auto ite = _terms.end();
uint32_t wordRef;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
index 99830e623eb..23e3f9ddfd8 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
@@ -5,6 +5,7 @@
#include "i_field_index_remove_listener.h"
#include <vespa/document/annotation/span.h>
#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/util/token_extractor.h>
#include <vespa/vespalib/stllike/allocator.h>
#include <vespa/vespalib/stllike/hash_map.h>
#include <limits>
@@ -179,9 +180,8 @@ private:
index::DocIdAndPosOccFeatures _features;
UInt32Vector _wordRefs;
- using SpanTerm = std::pair<document::Span, const document::FieldValue *>;
- using SpanTermVector = std::vector<SpanTerm>;
- SpanTermVector _terms;
+ using SpanTerm = linguistics::TokenExtractor::SpanTerm;
+ std::vector<SpanTerm> _terms;
// Info about aborted and pending documents.
std::vector<PositionRange> _abortedDocs;
diff --git a/searchlib/src/vespa/searchlib/test/string_field_builder.cpp b/searchlib/src/vespa/searchlib/test/string_field_builder.cpp
index e842b7b44d6..d81572d8913 100644
--- a/searchlib/src/vespa/searchlib/test/string_field_builder.cpp
+++ b/searchlib/src/vespa/searchlib/test/string_field_builder.cpp
@@ -7,6 +7,7 @@
#include <vespa/document/annotation/spanlist.h>
#include <vespa/document/annotation/spantree.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/util/linguisticsannotation.h>
#include <vespa/fastlib/text/unicodeutil.h>
#include <vespa/vespalib/text/utf8.h>
@@ -22,15 +23,10 @@ using document::SpanNode;
using document::SpanTree;
using vespalib::Utf8Reader;
using vespalib::Utf8Writer;
+using search::linguistics::SPANTREE_NAME;
namespace search::test {
-namespace {
-
-const vespalib::string SPANTREE_NAME("linguistics");
-
-}
-
StringFieldBuilder::StringFieldBuilder(const DocBuilder& doc_builder)
: _value(),
_span_start(0u),
diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt
index 500b08da815..e9661b5e919 100644
--- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt
@@ -13,10 +13,12 @@ vespa_add_library(searchlib_util OBJECT
filesizecalculator.cpp
fileutil.cpp
foldedstringcompare.cpp
+ linguisticsannotation.cpp
logutil.cpp
rawbuf.cpp
slime_output_raw_buf_adapter.cpp
state_explorer_utils.cpp
+ token_extractor.cpp
url.cpp
DEPENDS
)
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp b/searchlib/src/vespa/searchlib/util/linguisticsannotation.cpp
index c8aef561319..c8aef561319 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp
+++ b/searchlib/src/vespa/searchlib/util/linguisticsannotation.cpp
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h b/searchlib/src/vespa/searchlib/util/linguisticsannotation.h
index 83a19bed986..83a19bed986 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h
+++ b/searchlib/src/vespa/searchlib/util/linguisticsannotation.h
diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.cpp b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
new file mode 100644
index 00000000000..555ea86d299
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
@@ -0,0 +1,85 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "token_extractor.h"
+#include "linguisticsannotation.h"
+#include <vespa/document/annotation/alternatespanlist.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantreevisitor.h>
+
+using document::AlternateSpanList;
+using document::Annotation;
+using document::AnnotationType;
+using document::SimpleSpanList;
+using document::Span;
+using document::SpanList;
+using document::SpanNode;
+using document::SpanTreeVisitor;
+using document::StringFieldValue;
+
+namespace search::linguistics {
+
+namespace {
+
+class SpanFinder : public SpanTreeVisitor {
+public:
+ int32_t begin_pos;
+ int32_t end_pos;
+
+ SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {}
+ Span span() { return Span(begin_pos, end_pos - begin_pos); }
+
+ void visit(const Span &node) override {
+ begin_pos = std::min(begin_pos, node.from());
+ end_pos = std::max(end_pos, node.from() + node.length());
+ }
+ void visit(const SpanList &node) override {
+ for (const auto & span_ : node) {
+ span_->accept(*this);
+ }
+ }
+ void visit(const SimpleSpanList &node) override {
+ for (const auto & span_ : node) {
+ span_.accept(*this);
+ }
+ }
+ void visit(const AlternateSpanList &node) override {
+ for (size_t i = 0; i < node.getNumSubtrees(); ++i) {
+ visit(node.getSubtree(i));
+ }
+ }
+};
+
+Span
+getSpan(const SpanNode &span_node)
+{
+ SpanFinder finder;
+ span_node.accept(finder);
+ return finder.span();
+}
+
+}
+
+bool
+TokenExtractor::extract(bool allow_zero_length_tokens, std::vector<SpanTerm>& terms, const document::StringFieldValue::SpanTrees& trees)
+{
+ auto tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
+ if (tree == nullptr) {
+ return false;
+ }
+ for (const Annotation & annotation : *tree) {
+ const SpanNode *span = annotation.getSpanNode();
+ if ((span != nullptr) && annotation.valid() &&
+ (annotation.getType() == *AnnotationType::TERM))
+ {
+ Span sp = getSpan(*span);
+ if (sp.length() != 0 || allow_zero_length_tokens) {
+ terms.emplace_back(sp, annotation.getFieldValue());
+ }
+ }
+ }
+ std::sort(terms.begin(), terms.end());
+ return true;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.h b/searchlib/src/vespa/searchlib/util/token_extractor.h
new file mode 100644
index 00000000000..5796aaa7482
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/token_extractor.h
@@ -0,0 +1,27 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vector>
+
+namespace document {
+
+class FieldValue;
+class StringFieldValue;
+class Span;
+
+}
+
+namespace search::linguistics {
+
+/*
+ * Class used to extract tokens from annotated string field value.
+ */
+class TokenExtractor {
+public:
+ using SpanTerm = std::pair<document::Span, const document::FieldValue*>;
+ static bool extract(bool allow_zero_length_tokens, std::vector<SpanTerm>& terms, const document::StringFieldValue::SpanTrees& trees);
+};
+
+}
diff --git a/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp
index 16aa8c70131..0a05e078382 100644
--- a/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp
+++ b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp
@@ -9,9 +9,9 @@
#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/fixedtyperepo.h>
#include <vespa/juniper/juniper_separators.h>
+#include <vespa/searchlib/util/linguisticsannotation.h>
#include <vespa/searchsummary/docsummary/annotation_converter.h>
#include <vespa/searchsummary/docsummary/i_juniper_converter.h>
-#include <vespa/searchsummary/docsummary/linguisticsannotation.h>
#include <vespa/vespalib/data/slime/slime.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/stllike/asciistream.h>
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
index 9d61c61ef7a..32df047c27f 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -23,7 +23,6 @@ vespa_add_library(searchsummary_docsummary OBJECT
juniper_dfw_term_visitor.cpp
juniper_query_adapter.cpp
juniperproperties.cpp
- linguisticsannotation.cpp
matched_elements_filter_dfw.cpp
positionsdfw.cpp
query_term_filter.cpp
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
index 251cad47922..b4f76d8e39f 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
@@ -2,29 +2,21 @@
#include "annotation_converter.h"
#include "i_juniper_converter.h"
-#include "linguisticsannotation.h"
-#include <vespa/document/annotation/alternatespanlist.h>
#include <vespa/document/annotation/annotation.h>
-#include <vespa/document/annotation/spantree.h>
-#include <vespa/document/annotation/spantreevisitor.h>
-#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/document/annotation/span.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/juniper/juniper_separators.h>
+#include <vespa/searchlib/util/linguisticsannotation.h>
+#include <vespa/searchlib/util/token_extractor.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/exceptions.h>
#include <utility>
-using document::AlternateSpanList;
using document::Annotation;
-using document::AnnotationType;
using document::FieldValue;
-using document::SimpleSpanList;
using document::Span;
-using document::SpanList;
-using document::SpanNode;
-using document::SpanTree;
-using document::SpanTreeVisitor;
using document::StringFieldValue;
+using search::linguistics::TokenExtractor;
namespace search::docsummary {
@@ -36,40 +28,6 @@ getSpanString(vespalib::stringref s, const Span &span)
return {s.data() + span.from(), static_cast<size_t>(span.length())};
}
-struct SpanFinder : SpanTreeVisitor {
- int32_t begin_pos;
- int32_t end_pos;
-
- SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {}
- Span span() { return Span(begin_pos, end_pos - begin_pos); }
-
- void visit(const Span &node) override {
- begin_pos = std::min(begin_pos, node.from());
- end_pos = std::max(end_pos, node.from() + node.length());
- }
- void visit(const SpanList &node) override {
- for (const auto & span_ : node) {
- span_->accept(*this);
- }
- }
- void visit(const SimpleSpanList &node) override {
- for (const auto & span_ : node) {
- span_.accept(*this);
- }
- }
- void visit(const AlternateSpanList &node) override {
- for (size_t i = 0; i < node.getNumSubtrees(); ++i) {
- visit(node.getSubtree(i));
- }
- }
-};
-
-Span getSpan(const SpanNode &span_node) {
- SpanFinder finder;
- span_node.accept(finder);
- return finder.span();
-}
-
const StringFieldValue &ensureStringFieldValue(const FieldValue &value) __attribute__((noinline));
const StringFieldValue &ensureStringFieldValue(const FieldValue &value) {
@@ -125,28 +83,16 @@ AnnotationConverter::annotateSpans(const document::Span& span, ForwardIt it, For
void
AnnotationConverter::handleIndexingTerms(const StringFieldValue& value)
{
- StringFieldValue::SpanTrees trees = value.getSpanTrees();
- const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME);
- using SpanTerm = std::pair<Span, const FieldValue *>;
- using SpanTermVector = std::vector<SpanTerm>;
- if (!tree) {
+ using SpanTerm = TokenExtractor::SpanTerm;
+ std::vector<SpanTerm> terms;
+ auto span_trees = value.getSpanTrees();
+ if (!TokenExtractor::extract(true, terms, span_trees)) {
// Treat a string without annotations as a single span.
SpanTerm str(Span(0, _text.size()),
static_cast<const FieldValue*>(nullptr));
handleAnnotations(str.first, &str, &str + 1);
return;
}
- SpanTermVector terms;
- for (const Annotation& annotation : *tree) {
- // For now, skip any composite spans.
- const auto *span = dynamic_cast<const Span*>(annotation.getSpanNode());
- if ((span != nullptr) && annotation.valid() &&
- (annotation.getType() == *AnnotationType::TERM)) {
- terms.push_back(std::make_pair(getSpan(*span),
- annotation.getFieldValue()));
- }
- }
- sort(terms.begin(), terms.end());
auto it = terms.begin();
auto ite = terms.end();
int32_t endPos = 0;