diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-10-19 16:18:39 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-10-19 16:18:39 +0200 |
commit | f2cd66f364900826f61c1501e638a7fa2ee1f426 (patch) | |
tree | 31b6638eb756201301a748942e92273a11eae0cd /searchsummary/src | |
parent | b35b2fff2b856e642a2e4562e4aea79a8f185065 (diff) |
Rename linguistics-tokens to tokens.
Diffstat (limited to 'searchsummary/src')
11 files changed, 58 insertions, 58 deletions
diff --git a/searchsummary/src/tests/docsummary/linguistics_tokens_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/linguistics_tokens_converter/CMakeLists.txt deleted file mode 100644 index d9510c3a2b3..00000000000 --- a/searchsummary/src/tests/docsummary/linguistics_tokens_converter/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchsummary_linguistics_tokens_converter_test_app TEST - SOURCES - linguistics_tokens_converter_test.cpp - DEPENDS - searchsummary - GTest::gtest -) - -vespa_add_test(NAME searchsummary_linguistics_tokens_converter_test_app COMMAND searchsummary_linguistics_tokens_converter_test_app) diff --git a/searchsummary/src/tests/docsummary/tokens_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/tokens_converter/CMakeLists.txt new file mode 100644 index 00000000000..68885a74b1b --- /dev/null +++ b/searchsummary/src/tests/docsummary/tokens_converter/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchsummary_tokens_converter_test_app TEST + SOURCES + tokens_converter_test.cpp + DEPENDS + searchsummary + GTest::gtest +) + +vespa_add_test(NAME searchsummary_tokens_converter_test_app COMMAND searchsummary_tokens_converter_test_app) diff --git a/searchsummary/src/tests/docsummary/linguistics_tokens_converter/linguistics_tokens_converter_test.cpp b/searchsummary/src/tests/docsummary/tokens_converter/tokens_converter_test.cpp index beaa43c7af8..493cbe0ecba 100644 --- a/searchsummary/src/tests/docsummary/linguistics_tokens_converter/linguistics_tokens_converter_test.cpp +++ b/searchsummary/src/tests/docsummary/tokens_converter/tokens_converter_test.cpp @@ -10,7 +10,7 @@ #include <vespa/document/repo/fixedtyperepo.h> #include <vespa/searchlib/util/linguisticsannotation.h> #include <vespa/searchlib/util/token_extractor.h> -#include <vespa/searchsummary/docsummary/linguistics_tokens_converter.h> +#include <vespa/searchsummary/docsummary/tokens_converter.h> #include <vespa/vespalib/data/simple_buffer.h> #include <vespa/vespalib/data/slime/json_format.h> #include <vespa/vespalib/data/slime/slime.h> @@ -24,7 +24,7 @@ using document::Span; using document::SpanList; using document::SpanTree; using document::StringFieldValue; -using search::docsummary::LinguisticsTokensConverter; +using search::docsummary::TokensConverter; using search::linguistics::SPANTREE_NAME; using search::linguistics::TokenExtractor; using vespalib::SimpleBuffer; @@ -55,7 +55,7 @@ get_document_types_config() } -class LinguisticsTokensConverterTest : public testing::Test +class TokensConverterTest : public testing::Test { protected: std::shared_ptr<const DocumentTypeRepo> _repo; @@ -64,8 +64,8 @@ protected: vespalib::string _dummy_field_name; TokenExtractor _token_extractor; - LinguisticsTokensConverterTest(); - ~LinguisticsTokensConverterTest() override; + TokensConverterTest(); + ~TokensConverterTest() override; void set_span_tree(StringFieldValue& value, std::unique_ptr<SpanTree> tree); StringFieldValue make_annotated_string(bool alt_tokens); StringFieldValue make_annotated_chinese_string(); @@ -73,7 +73,7 @@ protected: vespalib::string convert(const StringFieldValue& fv); }; -LinguisticsTokensConverterTest::LinguisticsTokensConverterTest() +TokensConverterTest::TokensConverterTest() : testing::Test(), _repo(std::make_unique<DocumentTypeRepo>(get_document_types_config())), _document_type(_repo->getDocumentType("indexingdocument")), @@ -83,10 +83,10 @@ LinguisticsTokensConverterTest::LinguisticsTokensConverterTest() { } -LinguisticsTokensConverterTest::~LinguisticsTokensConverterTest() = default; +TokensConverterTest::~TokensConverterTest() = default; void -LinguisticsTokensConverterTest::set_span_tree(StringFieldValue & value, std::unique_ptr<SpanTree> tree) +TokensConverterTest::set_span_tree(StringFieldValue & value, std::unique_ptr<SpanTree> tree) { StringFieldValue::SpanTrees trees; trees.push_back(std::move(tree)); @@ -94,7 +94,7 @@ LinguisticsTokensConverterTest::set_span_tree(StringFieldValue & value, std::uni } StringFieldValue -LinguisticsTokensConverterTest::make_annotated_string(bool alt_tokens) +TokensConverterTest::make_annotated_string(bool alt_tokens) { auto span_list_up = std::make_unique<SpanList>(); auto span_list = span_list_up.get(); @@ -111,7 +111,7 @@ LinguisticsTokensConverterTest::make_annotated_string(bool alt_tokens) } StringFieldValue -LinguisticsTokensConverterTest::make_annotated_chinese_string() +TokensConverterTest::make_annotated_chinese_string() { auto span_list_up = std::make_unique<SpanList>(); auto span_list = span_list_up.get(); @@ -125,50 +125,50 @@ LinguisticsTokensConverterTest::make_annotated_chinese_string() } vespalib::string -LinguisticsTokensConverterTest::make_exp_annotated_chinese_string_tokens() +TokensConverterTest::make_exp_annotated_chinese_string_tokens() { return R"(["我就是那个","大灰狼"])"; } vespalib::string -LinguisticsTokensConverterTest::convert(const StringFieldValue& fv) +TokensConverterTest::convert(const StringFieldValue& fv) { - LinguisticsTokensConverter converter(_token_extractor); + TokensConverter converter(_token_extractor); Slime slime; SlimeInserter inserter(slime); converter.convert(fv, inserter); return slime_to_string(slime); } -TEST_F(LinguisticsTokensConverterTest, convert_empty_string) +TEST_F(TokensConverterTest, convert_empty_string) { vespalib::string exp(R"([])"); StringFieldValue plain_string(""); EXPECT_EQ(exp, convert(plain_string)); } -TEST_F(LinguisticsTokensConverterTest, convert_plain_string) +TEST_F(TokensConverterTest, convert_plain_string) { vespalib::string exp(R"(["Foo Bar Baz"])"); StringFieldValue plain_string("Foo Bar Baz"); EXPECT_EQ(exp, convert(plain_string)); } -TEST_F(LinguisticsTokensConverterTest, convert_annotated_string) +TEST_F(TokensConverterTest, convert_annotated_string) { vespalib::string exp(R"(["foo","baz"])"); auto annotated_string = make_annotated_string(false); EXPECT_EQ(exp, convert(annotated_string)); } -TEST_F(LinguisticsTokensConverterTest, convert_annotated_string_with_alternatives) +TEST_F(TokensConverterTest, convert_annotated_string_with_alternatives) { vespalib::string exp(R"(["foo",["bar","baz"]])"); auto annotated_string = make_annotated_string(true); EXPECT_EQ(exp, convert(annotated_string)); } -TEST_F(LinguisticsTokensConverterTest, convert_annotated_chinese_string) +TEST_F(TokensConverterTest, convert_annotated_chinese_string) { auto exp = make_exp_annotated_chinese_string_tokens(); auto annotated_chinese_string = make_annotated_chinese_string(); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index 57b6004fb61..0287517f830 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -23,8 +23,6 @@ vespa_add_library(searchsummary_docsummary OBJECT juniper_dfw_term_visitor.cpp juniper_query_adapter.cpp juniperproperties.cpp - linguistics_tokens_converter.cpp - linguistics_tokens_dfw.cpp matched_elements_filter_dfw.cpp positionsdfw.cpp query_term_filter.cpp @@ -39,4 +37,6 @@ vespa_add_library(searchsummary_docsummary OBJECT struct_fields_resolver.cpp struct_map_attribute_combiner_dfw.cpp summaryfeaturesdfw.cpp + tokens_converter.cpp + tokens_dfw.cpp ) diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp index c4823f6beeb..2ac5d1babbf 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp @@ -12,12 +12,12 @@ const vespalib::string documentid("documentid"); const vespalib::string dynamic_teaser("dynamicteaser"); const vespalib::string empty("empty"); const vespalib::string geo_position("geopos"); -const vespalib::string linguistics_tokens("linguistics-tokens"); const vespalib::string matched_attribute_elements_filter("matchedattributeelementsfilter"); const vespalib::string matched_elements_filter("matchedelementsfilter"); const vespalib::string positions("positions"); const vespalib::string rank_features("rankfeatures"); const vespalib::string summary_features("summaryfeatures"); +const vespalib::string tokens("tokens"); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h index 2d0b8c23855..d53351d8b04 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h @@ -18,11 +18,11 @@ extern const vespalib::string documentid; extern const vespalib::string dynamic_teaser; extern const vespalib::string empty; extern const vespalib::string geo_position; -extern const vespalib::string linguistics_tokens; extern const vespalib::string matched_attribute_elements_filter; extern const vespalib::string matched_elements_filter; extern const vespalib::string positions; extern const vespalib::string rank_features; extern const vespalib::string summary_features; +extern const vespalib::string tokens; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp index d19d2994104..2f7d9acdb65 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp @@ -9,11 +9,11 @@ #include "geoposdfw.h" #include "idocsumenvironment.h" #include "juniperdfw.h" -#include "linguistics_tokens_dfw.h" #include "matched_elements_filter_dfw.h" #include "positionsdfw.h" #include "rankfeaturesdfw.h" #include "summaryfeaturesdfw.h" +#include "tokens_dfw.h" #include <vespa/searchlib/common/matching_elements_fields.h> #include <vespa/vespalib/util/exceptions.h> @@ -85,9 +85,9 @@ DocsumFieldWriterFactory::create_docsum_field_writer(const vespalib::string& fie } else { throw_missing_source(command); } - } else if (command == command::linguistics_tokens) { + } else if (command == command::tokens) { if (!source.empty()) { - fieldWriter = std::make_unique<LinguisticsTokensDFW>(source); + fieldWriter = std::make_unique<TokensDFW>(source); } else { throw_missing_source(command); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/tokens_converter.cpp index b9b9d7c4c97..e2849fe793e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_converter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/tokens_converter.cpp @@ -1,6 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "linguistics_tokens_converter.h" +#include "tokens_converter.h" #include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/searchlib/util/token_extractor.h> #include <vespa/vespalib/data/slime/slime.h> @@ -14,18 +14,18 @@ using vespalib::slime::Inserter; namespace search::docsummary { -LinguisticsTokensConverter::LinguisticsTokensConverter(const TokenExtractor& token_extractor) +TokensConverter::TokensConverter(const TokenExtractor& token_extractor) : IStringFieldConverter(), _token_extractor(token_extractor), _text() { } -LinguisticsTokensConverter::~LinguisticsTokensConverter() = default; +TokensConverter::~TokensConverter() = default; template <typename ForwardIt> void -LinguisticsTokensConverter::handle_alternative_index_terms(ForwardIt it, ForwardIt last, Inserter& inserter) +TokensConverter::handle_alternative_index_terms(ForwardIt it, ForwardIt last, Inserter& inserter) { Cursor& a = inserter.insertArray(); ArrayInserter ai(a); @@ -35,13 +35,13 @@ LinguisticsTokensConverter::handle_alternative_index_terms(ForwardIt it, Forward } void -LinguisticsTokensConverter::handle_index_term(vespalib::stringref word, Inserter& inserter) +TokensConverter::handle_index_term(vespalib::stringref word, Inserter& inserter) { inserter.insertString(Memory(word)); } void -LinguisticsTokensConverter::handle_indexing_terms(const StringFieldValue& value, vespalib::slime::Inserter& inserter) +TokensConverter::handle_indexing_terms(const StringFieldValue& value, vespalib::slime::Inserter& inserter) { Cursor& a = inserter.insertArray(); ArrayInserter ai(a); @@ -63,14 +63,14 @@ LinguisticsTokensConverter::handle_indexing_terms(const StringFieldValue& value, } void -LinguisticsTokensConverter::convert(const StringFieldValue &input, vespalib::slime::Inserter& inserter) +TokensConverter::convert(const StringFieldValue &input, vespalib::slime::Inserter& inserter) { _text = input.getValueRef(); handle_indexing_terms(input, inserter); } bool -LinguisticsTokensConverter::render_weighted_set_as_array() const +TokensConverter::render_weighted_set_as_array() const { return true; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/tokens_converter.h index d752fe89ed9..1798abac203 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_converter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/tokens_converter.h @@ -10,10 +10,10 @@ namespace search::docsummary { /* * Class converting a string field value with annotations into an array - * containing the index terms. Multiple index terms at same position are + * containing the tokens. Multiple tokens at same position are * placed in a nested array. */ -class LinguisticsTokensConverter : public IStringFieldConverter +class TokensConverter : public IStringFieldConverter { const linguistics::TokenExtractor& _token_extractor; vespalib::stringref _text; @@ -23,8 +23,8 @@ class LinguisticsTokensConverter : public IStringFieldConverter void handle_index_term(vespalib::stringref word, vespalib::slime::Inserter& inserter); void handle_indexing_terms(const document::StringFieldValue& value, vespalib::slime::Inserter& inserter); public: - LinguisticsTokensConverter(const linguistics::TokenExtractor& token_extractor); - ~LinguisticsTokensConverter() override; + TokensConverter(const linguistics::TokenExtractor& token_extractor); + ~TokensConverter() override; void convert(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) override; bool render_weighted_set_as_array() const override; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/tokens_dfw.cpp index 5e94e270c53..0741e5cc352 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/tokens_dfw.cpp @@ -1,34 +1,34 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "linguistics_tokens_dfw.h" +#include "tokens_dfw.h" #include "i_docsum_store_document.h" -#include "linguistics_tokens_converter.h" +#include "tokens_converter.h" #include <vespa/searchlib/memoryindex/field_inverter.h> using search::memoryindex::FieldInverter; namespace search::docsummary { -LinguisticsTokensDFW::LinguisticsTokensDFW(const vespalib::string& input_field_name) +TokensDFW::TokensDFW(const vespalib::string& input_field_name) : DocsumFieldWriter(), _input_field_name(input_field_name), _token_extractor(_input_field_name, FieldInverter::max_word_len) { } -LinguisticsTokensDFW::~LinguisticsTokensDFW() = default; +TokensDFW::~TokensDFW() = default; bool -LinguisticsTokensDFW::isGenerated() const +TokensDFW::isGenerated() const { return false; } void -LinguisticsTokensDFW::insertField(uint32_t, const IDocsumStoreDocument* doc, GetDocsumsState&, vespalib::slime::Inserter& target) const +TokensDFW::insertField(uint32_t, const IDocsumStoreDocument* doc, GetDocsumsState&, vespalib::slime::Inserter& target) const { if (doc != nullptr) { - LinguisticsTokensConverter converter(_token_extractor); + TokensConverter converter(_token_extractor); doc->insert_summary_field(_input_field_name, target, &converter); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/tokens_dfw.h index 9c6955b322e..e9f91ab683a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/linguistics_tokens_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/tokens_dfw.h @@ -10,17 +10,17 @@ namespace search::docsummary { /* * Class for writing annotated string field values from document as - * arrays containing the indexing terms. + * arrays containing the tokens. */ -class LinguisticsTokensDFW : public DocsumFieldWriter +class TokensDFW : public DocsumFieldWriter { private: vespalib::string _input_field_name; linguistics::TokenExtractor _token_extractor; public: - explicit LinguisticsTokensDFW(const vespalib::string& input_field_name); - ~LinguisticsTokensDFW() override; + explicit TokensDFW(const vespalib::string& input_field_name); + ~TokensDFW() override; bool isGenerated() const override; void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, vespalib::slime::Inserter& target) const override; }; |