summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-10-12 12:36:50 +0200
committerTor Egge <Tor.Egge@online.no>2023-10-12 12:36:50 +0200
commit706bf2929c840606efba2763b177ae435579c1d7 (patch)
tree45db7324ec136e87809135260f2a7491ca49150a /searchsummary
parent686dc5941b174ffab2de1ee1da90402977947e64 (diff)
Move more checks to TokenExtractor.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp40
1 files changed, 13 insertions, 27 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
index b4f76d8e39f..bf267ab9e27 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
@@ -6,6 +6,7 @@
#include <vespa/document/annotation/span.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/juniper/juniper_separators.h>
+#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/util/linguisticsannotation.h>
#include <vespa/searchlib/util/token_extractor.h>
#include <vespa/vespalib/stllike/asciistream.h>
@@ -17,6 +18,7 @@ using document::FieldValue;
using document::Span;
using document::StringFieldValue;
using search::linguistics::TokenExtractor;
+using search::memoryindex::FieldInverter;
namespace search::docsummary {
@@ -28,14 +30,7 @@ getSpanString(vespalib::stringref s, const Span &span)
return {s.data() + span.from(), static_cast<size_t>(span.length())};
}
-const StringFieldValue &ensureStringFieldValue(const FieldValue &value) __attribute__((noinline));
-
-const StringFieldValue &ensureStringFieldValue(const FieldValue &value) {
- if (!value.isA(FieldValue::Type::STRING)) {
- throw vespalib::IllegalArgumentException("Illegal field type. " + value.toString(), VESPA_STRLOC);
- }
- return static_cast<const StringFieldValue &>(value);
-}
+vespalib::string dummy_field_name;
}
@@ -53,7 +48,7 @@ template <typename ForwardIt>
void
AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last) {
int annCnt = (last - it);
- if (annCnt > 1 || (annCnt == 1 && it->second)) {
+ if (annCnt > 1 || (annCnt == 1 && it->altered)) {
annotateSpans(span, it, last);
} else {
_out << getSpanString(_text, span) << juniper::separators::unit_separator_string;
@@ -67,11 +62,7 @@ AnnotationConverter::annotateSpans(const document::Span& span, ForwardIt it, For
<< (getSpanString(_text, span))
<< juniper::separators::interlinear_annotation_separator_string; // SEPARATOR
while (it != last) {
- if (it->second) {
- _out << ensureStringFieldValue(*it->second).getValue();
- } else {
- _out << getSpanString(_text, span);
- }
+ _out << it->word;
if (++it != last) {
_out << " ";
}
@@ -86,26 +77,21 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value)
using SpanTerm = TokenExtractor::SpanTerm;
std::vector<SpanTerm> terms;
auto span_trees = value.getSpanTrees();
- if (!TokenExtractor::extract(true, terms, span_trees)) {
- // Treat a string without annotations as a single span.
- SpanTerm str(Span(0, _text.size()),
- static_cast<const FieldValue*>(nullptr));
- handleAnnotations(str.first, &str, &str + 1);
- return;
- }
+ TokenExtractor token_extractor(dummy_field_name, FieldInverter::max_word_len);
+ token_extractor.extract(terms, span_trees, _text, nullptr);
auto it = terms.begin();
auto ite = terms.end();
int32_t endPos = 0;
for (; it != ite; ) {
auto it_begin = it;
- if (it_begin->first.from() > endPos) {
- Span tmpSpan(endPos, it_begin->first.from() - endPos);
+ if (it_begin->span.from() > endPos) {
+ Span tmpSpan(endPos, it_begin->span.from() - endPos);
handleAnnotations(tmpSpan, it, it);
- endPos = it_begin->first.from();
+ endPos = it_begin->span.from();
}
- for (; it != ite && it->first == it_begin->first; ++it);
- handleAnnotations(it_begin->first, it_begin, it);
- endPos = it_begin->first.from() + it_begin->first.length();
+ for (; it != ite && it->span == it_begin->span; ++it);
+ handleAnnotations(it_begin->span, it_begin, it);
+ endPos = it_begin->span.from() + it_begin->span.length();
}
int32_t wantEndPos = _text.size();
if (endPos < wantEndPos) {