diff options
18 files changed, 150 insertions, 158 deletions
diff --git a/client/go/go.mod b/client/go/go.mod index 89186abea2f..bf0e53a0f03 100644 --- a/client/go/go.mod +++ b/client/go/go.mod @@ -16,7 +16,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 github.com/zalando/go-keyring v0.2.3 - golang.org/x/net v0.16.0 + golang.org/x/net v0.17.0 golang.org/x/sys v0.13.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/client/go/go.sum b/client/go/go.sum index 9347b3500bf..87282411b18 100644 --- a/client/go/go.sum +++ b/client/go/go.sum @@ -72,6 +72,8 @@ golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos= golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210819135213-f52c844e1c1c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def b/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def index d9c32376512..c1c0944d7eb 100644 --- a/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def +++ b/container-core/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def @@ -131,7 +131,7 @@ http2Enabled bool default=true http2.streamIdleTimeout double default=600 -http2.maxConcurrentStreams int default=4096 +http2.maxConcurrentStreams int default=512 # Override the default server name when authority is missing from request. serverName.fallback string default="" diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java index fad70329136..287342f1290 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java @@ -18,6 +18,7 @@ import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Optional; +import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -32,6 +33,8 @@ import static com.yahoo.vespa.hosted.controller.notification.Notification.Type; */ public class NotificationsDb { + private static final Logger log = Logger.getLogger(NotificationsDb.class.getName()); + private final Clock clock; private final CuratorDb curatorDb; private final Notifier notifier; @@ -78,7 +81,10 @@ public class NotificationsDb { notifications.add(notification); curatorDb.writeNotifications(source.tenant(), notifications); } - changed.ifPresent(notifier::dispatch); + changed.ifPresent(c -> { + log.fine(() -> "New notification %s".formatted(c)); + notifier.dispatch(c); + }); } /** Remove the notification with the given source and type */ @@ -156,9 +162,11 @@ public class NotificationsDb { private boolean notificationExists(Notification notification, List<Notification> existing, boolean mindHigherLevel) { // Be conservative for now, only dispatch notifications if they are from new source or with new type. // the message content and level is ignored for now - return existing.stream().anyMatch(e -> - notification.source().contains(e.source()) && notification.type().equals(e.type()) && + boolean exists = existing.stream() + .anyMatch(e -> notification.source().contains(e.source()) && notification.type().equals(e.type()) && (!mindHigherLevel || notification.level().ordinal() <= e.level().ordinal())); + log.fine(() -> "%s in %s == %b".formatted(notification, existing, exists)); + return exists; } private static Optional<Notification> createFeedBlockNotification(NotificationSource source, Instant at, ClusterMetrics metric) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java index 4d8906f6fe5..afb260bf765 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notifier.java @@ -8,7 +8,6 @@ import com.yahoo.restapi.UriBuilder; import com.yahoo.text.Text; import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.controller.api.integration.organization.Mail; import com.yahoo.vespa.hosted.controller.api.integration.organization.Mailer; @@ -99,6 +98,8 @@ public class Notifier { private void dispatch(Notification notification, Collection<TenantContacts.EmailContact> contacts) { try { + log.fine(() -> "Sending notification " + notification + " to " + + contacts.stream().map(c -> c.email().getEmailAddress()).toList()); var content = formatter.format(notification); mailer.send(mailOf(content, contacts.stream() .filter(c -> c.email().isVerified()) diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index 5aff4935bee..c015e9a9a33 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -113,7 +113,7 @@ <mimepull.vespa.version>1.10.0</mimepull.vespa.version> <mockito.vespa.version>5.6.0</mockito.vespa.version> <mojo-executor.vespa.version>2.4.0</mojo-executor.vespa.version> - <netty.vespa.version>4.1.99.Final</netty.vespa.version> + <netty.vespa.version>4.1.100.Final</netty.vespa.version> <netty-tcnative.vespa.version>2.0.62.Final</netty-tcnative.vespa.version> <onnxruntime.vespa.version>1.15.1</onnxruntime.vespa.version> <opennlp.vespa.version>2.3.0</opennlp.vespa.version> diff --git a/searchcore/src/tests/proton/docsummary/docsummary_test.cpp b/searchcore/src/tests/proton/docsummary/docsummary_test.cpp index 1fcb1b09d94..8264ec6b680 100644 --- a/searchcore/src/tests/proton/docsummary/docsummary_test.cpp +++ b/searchcore/src/tests/proton/docsummary/docsummary_test.cpp @@ -23,12 +23,12 @@ #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/tensor/tensor_attribute.h> #include <vespa/searchlib/test/doc_builder.h> +#include <vespa/searchlib/util/linguisticsannotation.h> #include <vespa/searchlib/transactionlog/nosyncproxy.h> #include <vespa/searchlib/transactionlog/translogserver.h> #include <vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h> #include <vespa/searchsummary/docsummary/i_docsum_store_document.h> #include <vespa/searchsummary/docsummary/i_juniper_converter.h> -#include <vespa/searchsummary/docsummary/linguisticsannotation.h> #include <vespa/config-bucketspaces.h> #include <vespa/config/helper/configgetter.hpp> #include <vespa/document/annotation/annotation.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp index 042b57f0486..2a54859352d 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp @@ -2,12 +2,8 @@ #include "field_inverter.h" #include "ordered_field_index_inserter.h" -#include <vespa/document/annotation/alternatespanlist.h> #include <vespa/document/annotation/annotation.h> #include <vespa/document/annotation/span.h> -#include <vespa/document/annotation/spanlist.h> -#include <vespa/document/annotation/spantree.h> -#include <vespa/document/annotation/spantreevisitor.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> @@ -30,9 +26,7 @@ LOG_SETUP(".searchlib.memoryindex.fieldinverter"); namespace search::memoryindex { -using document::AlternateSpanList; using document::Annotation; -using document::AnnotationType; using document::ArrayFieldValue; using document::DataType; using document::Document; @@ -40,79 +34,24 @@ using document::DocumentType; using document::Field; using document::FieldValue; using document::IntFieldValue; -using document::SimpleSpanList; using document::Span; -using document::SpanList; -using document::SpanNode; -using document::SpanTree; -using document::SpanTreeVisitor; using document::StringFieldValue; using document::StructFieldValue; using document::WeightedSetFieldValue; using index::DocIdAndPosOccFeatures; using index::Schema; using search::index::schema::CollectionType; +using search::linguistics::TokenExtractor; using search::util::URL; using vespalib::make_string; using vespalib::datastore::Aligner; -namespace documentinverterkludge::linguistics { - -const vespalib::string SPANTREE_NAME("linguistics"); - -} - -using namespace documentinverterkludge; - -namespace { - -class SpanFinder : public SpanTreeVisitor { -public: - int32_t begin_pos; - int32_t end_pos; - - SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {} - Span span() { return Span(begin_pos, end_pos - begin_pos); } - - void visit(const Span &node) override { - begin_pos = std::min(begin_pos, node.from()); - end_pos = std::max(end_pos, node.from() + node.length()); - } - void visit(const SpanList &node) override { - for (const auto & span_ : node) { - const_cast<SpanNode *>(span_)->accept(*this); - } - } - void visit(const SimpleSpanList &node) override { - for (const auto & span_ : node) { - const_cast<Span &>(span_).accept(*this); - } - } - void visit(const AlternateSpanList &node) override { - for (size_t i = 0; i < node.getNumSubtrees(); ++i) { - visit(node.getSubtree(i)); - } - } -}; - -Span -getSpan(const SpanNode &span_node) -{ - SpanFinder finder; - // The SpanNode will not be changed. - const_cast<SpanNode &>(span_node).accept(finder); - return finder.span(); -} - -} - void FieldInverter::processAnnotations(const StringFieldValue &value, const Document& doc) { _terms.clear(); - StringFieldValue::SpanTrees spanTrees = value.getSpanTrees(); - const SpanTree *tree = StringFieldValue::findTree(spanTrees, linguistics::SPANTREE_NAME); - if (tree == nullptr) { + auto span_trees = value.getSpanTrees(); + if (!TokenExtractor::extract(false, _terms, span_trees)) { /* This is wrong unless field is exact match */ const vespalib::string &text = value.getValue(); if (text.empty()) { @@ -126,19 +65,6 @@ FieldInverter::processAnnotations(const StringFieldValue &value, const Document& return; } const vespalib::string &text = value.getValue(); - for (const Annotation & annotation : *tree) { - const SpanNode *span = annotation.getSpanNode(); - if ((span != nullptr) && annotation.valid() && - (annotation.getType() == *AnnotationType::TERM)) - { - Span sp = getSpan(*span); - if (sp.length() != 0) { - _terms.push_back(std::make_pair(sp, - annotation.getFieldValue())); - } - } - } - std::sort(_terms.begin(), _terms.end()); auto it = _terms.begin(); auto ite = _terms.end(); uint32_t wordRef; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index 99830e623eb..23e3f9ddfd8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -5,6 +5,7 @@ #include "i_field_index_remove_listener.h" #include <vespa/document/annotation/span.h> #include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/util/token_extractor.h> #include <vespa/vespalib/stllike/allocator.h> #include <vespa/vespalib/stllike/hash_map.h> #include <limits> @@ -179,9 +180,8 @@ private: index::DocIdAndPosOccFeatures _features; UInt32Vector _wordRefs; - using SpanTerm = std::pair<document::Span, const document::FieldValue *>; - using SpanTermVector = std::vector<SpanTerm>; - SpanTermVector _terms; + using SpanTerm = linguistics::TokenExtractor::SpanTerm; + std::vector<SpanTerm> _terms; // Info about aborted and pending documents. std::vector<PositionRange> _abortedDocs; diff --git a/searchlib/src/vespa/searchlib/test/string_field_builder.cpp b/searchlib/src/vespa/searchlib/test/string_field_builder.cpp index e842b7b44d6..d81572d8913 100644 --- a/searchlib/src/vespa/searchlib/test/string_field_builder.cpp +++ b/searchlib/src/vespa/searchlib/test/string_field_builder.cpp @@ -7,6 +7,7 @@ #include <vespa/document/annotation/spanlist.h> #include <vespa/document/annotation/spantree.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/util/linguisticsannotation.h> #include <vespa/fastlib/text/unicodeutil.h> #include <vespa/vespalib/text/utf8.h> @@ -22,15 +23,10 @@ using document::SpanNode; using document::SpanTree; using vespalib::Utf8Reader; using vespalib::Utf8Writer; +using search::linguistics::SPANTREE_NAME; namespace search::test { -namespace { - -const vespalib::string SPANTREE_NAME("linguistics"); - -} - StringFieldBuilder::StringFieldBuilder(const DocBuilder& doc_builder) : _value(), _span_start(0u), diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt index 500b08da815..e9661b5e919 100644 --- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -13,10 +13,12 @@ vespa_add_library(searchlib_util OBJECT filesizecalculator.cpp fileutil.cpp foldedstringcompare.cpp + linguisticsannotation.cpp logutil.cpp rawbuf.cpp slime_output_raw_buf_adapter.cpp state_explorer_utils.cpp + token_extractor.cpp url.cpp DEPENDS ) diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp b/searchlib/src/vespa/searchlib/util/linguisticsannotation.cpp index c8aef561319..c8aef561319 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp +++ b/searchlib/src/vespa/searchlib/util/linguisticsannotation.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h b/searchlib/src/vespa/searchlib/util/linguisticsannotation.h index 83a19bed986..83a19bed986 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h +++ b/searchlib/src/vespa/searchlib/util/linguisticsannotation.h diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.cpp b/searchlib/src/vespa/searchlib/util/token_extractor.cpp new file mode 100644 index 00000000000..555ea86d299 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/token_extractor.cpp @@ -0,0 +1,85 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "token_extractor.h" +#include "linguisticsannotation.h" +#include <vespa/document/annotation/alternatespanlist.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantreevisitor.h> + +using document::AlternateSpanList; +using document::Annotation; +using document::AnnotationType; +using document::SimpleSpanList; +using document::Span; +using document::SpanList; +using document::SpanNode; +using document::SpanTreeVisitor; +using document::StringFieldValue; + +namespace search::linguistics { + +namespace { + +class SpanFinder : public SpanTreeVisitor { +public: + int32_t begin_pos; + int32_t end_pos; + + SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {} + Span span() { return Span(begin_pos, end_pos - begin_pos); } + + void visit(const Span &node) override { + begin_pos = std::min(begin_pos, node.from()); + end_pos = std::max(end_pos, node.from() + node.length()); + } + void visit(const SpanList &node) override { + for (const auto & span_ : node) { + span_->accept(*this); + } + } + void visit(const SimpleSpanList &node) override { + for (const auto & span_ : node) { + span_.accept(*this); + } + } + void visit(const AlternateSpanList &node) override { + for (size_t i = 0; i < node.getNumSubtrees(); ++i) { + visit(node.getSubtree(i)); + } + } +}; + +Span +getSpan(const SpanNode &span_node) +{ + SpanFinder finder; + span_node.accept(finder); + return finder.span(); +} + +} + +bool +TokenExtractor::extract(bool allow_zero_length_tokens, std::vector<SpanTerm>& terms, const document::StringFieldValue::SpanTrees& trees) +{ + auto tree = StringFieldValue::findTree(trees, SPANTREE_NAME); + if (tree == nullptr) { + return false; + } + for (const Annotation & annotation : *tree) { + const SpanNode *span = annotation.getSpanNode(); + if ((span != nullptr) && annotation.valid() && + (annotation.getType() == *AnnotationType::TERM)) + { + Span sp = getSpan(*span); + if (sp.length() != 0 || allow_zero_length_tokens) { + terms.emplace_back(sp, annotation.getFieldValue()); + } + } + } + std::sort(terms.begin(), terms.end()); + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.h b/searchlib/src/vespa/searchlib/util/token_extractor.h new file mode 100644 index 00000000000..5796aaa7482 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/token_extractor.h @@ -0,0 +1,27 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vector> + +namespace document { + +class FieldValue; +class StringFieldValue; +class Span; + +} + +namespace search::linguistics { + +/* + * Class used to extract tokens from annotated string field value. + */ +class TokenExtractor { +public: + using SpanTerm = std::pair<document::Span, const document::FieldValue*>; + static bool extract(bool allow_zero_length_tokens, std::vector<SpanTerm>& terms, const document::StringFieldValue::SpanTrees& trees); +}; + +} diff --git a/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp index 16aa8c70131..0a05e078382 100644 --- a/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp +++ b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp @@ -9,9 +9,9 @@ #include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/fixedtyperepo.h> #include <vespa/juniper/juniper_separators.h> +#include <vespa/searchlib/util/linguisticsannotation.h> #include <vespa/searchsummary/docsummary/annotation_converter.h> #include <vespa/searchsummary/docsummary/i_juniper_converter.h> -#include <vespa/searchsummary/docsummary/linguisticsannotation.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/stllike/asciistream.h> diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index 9d61c61ef7a..32df047c27f 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -23,7 +23,6 @@ vespa_add_library(searchsummary_docsummary OBJECT juniper_dfw_term_visitor.cpp juniper_query_adapter.cpp juniperproperties.cpp - linguisticsannotation.cpp matched_elements_filter_dfw.cpp positionsdfw.cpp query_term_filter.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp index 251cad47922..b4f76d8e39f 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp @@ -2,29 +2,21 @@ #include "annotation_converter.h" #include "i_juniper_converter.h" -#include "linguisticsannotation.h" -#include <vespa/document/annotation/alternatespanlist.h> #include <vespa/document/annotation/annotation.h> -#include <vespa/document/annotation/spantree.h> -#include <vespa/document/annotation/spantreevisitor.h> -#include <vespa/document/datatype/annotationtype.h> +#include <vespa/document/annotation/span.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/juniper/juniper_separators.h> +#include <vespa/searchlib/util/linguisticsannotation.h> +#include <vespa/searchlib/util/token_extractor.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/exceptions.h> #include <utility> -using document::AlternateSpanList; using document::Annotation; -using document::AnnotationType; using document::FieldValue; -using document::SimpleSpanList; using document::Span; -using document::SpanList; -using document::SpanNode; -using document::SpanTree; -using document::SpanTreeVisitor; using document::StringFieldValue; +using search::linguistics::TokenExtractor; namespace search::docsummary { @@ -36,40 +28,6 @@ getSpanString(vespalib::stringref s, const Span &span) return {s.data() + span.from(), static_cast<size_t>(span.length())}; } -struct SpanFinder : SpanTreeVisitor { - int32_t begin_pos; - int32_t end_pos; - - SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {} - Span span() { return Span(begin_pos, end_pos - begin_pos); } - - void visit(const Span &node) override { - begin_pos = std::min(begin_pos, node.from()); - end_pos = std::max(end_pos, node.from() + node.length()); - } - void visit(const SpanList &node) override { - for (const auto & span_ : node) { - span_->accept(*this); - } - } - void visit(const SimpleSpanList &node) override { - for (const auto & span_ : node) { - span_.accept(*this); - } - } - void visit(const AlternateSpanList &node) override { - for (size_t i = 0; i < node.getNumSubtrees(); ++i) { - visit(node.getSubtree(i)); - } - } -}; - -Span getSpan(const SpanNode &span_node) { - SpanFinder finder; - span_node.accept(finder); - return finder.span(); -} - const StringFieldValue &ensureStringFieldValue(const FieldValue &value) __attribute__((noinline)); const StringFieldValue &ensureStringFieldValue(const FieldValue &value) { @@ -125,28 +83,16 @@ AnnotationConverter::annotateSpans(const document::Span& span, ForwardIt it, For void AnnotationConverter::handleIndexingTerms(const StringFieldValue& value) { - StringFieldValue::SpanTrees trees = value.getSpanTrees(); - const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); - using SpanTerm = std::pair<Span, const FieldValue *>; - using SpanTermVector = std::vector<SpanTerm>; - if (!tree) { + using SpanTerm = TokenExtractor::SpanTerm; + std::vector<SpanTerm> terms; + auto span_trees = value.getSpanTrees(); + if (!TokenExtractor::extract(true, terms, span_trees)) { // Treat a string without annotations as a single span. SpanTerm str(Span(0, _text.size()), static_cast<const FieldValue*>(nullptr)); handleAnnotations(str.first, &str, &str + 1); return; } - SpanTermVector terms; - for (const Annotation& annotation : *tree) { - // For now, skip any composite spans. - const auto *span = dynamic_cast<const Span*>(annotation.getSpanNode()); - if ((span != nullptr) && annotation.valid() && - (annotation.getType() == *AnnotationType::TERM)) { - terms.push_back(std::make_pair(getSpan(*span), - annotation.getFieldValue())); - } - } - sort(terms.begin(), terms.end()); auto it = terms.begin(); auto ite = terms.end(); int32_t endPos = 0; |