diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-08-16 13:14:38 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-08-16 13:36:09 +0000 |
commit | c6331f25f9b6912d649c4fbdd65476775a4b6192 (patch) | |
tree | 3151f3bd7f20d2d78629c5f3d5984679786bff2a /searchlib | |
parent | e2fd2769172926471e602190afabc87815d4063a (diff) |
move FeatureNameExtractor
to make it available for use in vespa-eval-expr
Diffstat (limited to 'searchlib')
7 files changed, 4 insertions, 192 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index ea47dddb99b..16a35a9ce9f 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -209,7 +209,6 @@ vespa_define_module( src/tests/queryeval/weak_and_scorers src/tests/queryeval/weighted_set_term src/tests/queryeval/wrappers - src/tests/rankingexpression/feature_name_extractor src/tests/rankingexpression/intrinsic_blueprint_adapter src/tests/ranksetup src/tests/ranksetup/verify_feature diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore deleted file mode 100644 index 88c86c1720e..00000000000 --- a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore +++ /dev/null @@ -1 +0,0 @@ -searchlib_feature_name_extractor_test_app diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt deleted file mode 100644 index a2e153c7527..00000000000 --- a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_feature_name_extractor_test_app TEST - SOURCES - feature_name_extractor_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_feature_name_extractor_test_app COMMAND searchlib_feature_name_extractor_test_app) diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp deleted file mode 100644 index 7b3683f75d5..00000000000 --- a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> -#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h> - -using search::features::rankingexpression::FeatureNameExtractor; - -void verify_extract(const vespalib::string &input, - const vespalib::string &expect_symbol, - const vespalib::string &expect_after) -{ - FeatureNameExtractor extractor; - const char *pos_in = input.data(); - const char *end_in = input.data() + input.size(); - vespalib::string symbol_out; - const char *pos_out = nullptr; - extractor.extract_symbol(pos_in, end_in, pos_out, symbol_out); - ASSERT_TRUE(pos_out != nullptr); - vespalib::string after(pos_out, end_in); - EXPECT_EQUAL(expect_symbol, symbol_out); - EXPECT_EQUAL(expect_after, after); -} - -TEST("require that basic names are extracted correctly") { - TEST_DO(verify_extract("foo+", "foo", "+")); - TEST_DO(verify_extract("foo.out+", "foo.out", "+")); - TEST_DO(verify_extract("foo(p1,p2)+", "foo(p1,p2)", "+")); - TEST_DO(verify_extract("foo(p1,p2).out+", "foo(p1,p2).out", "+")); -} - -TEST("require that special characters are allowed in prefix and suffix") { - TEST_DO(verify_extract("_@$+", "_@$", "+")); - TEST_DO(verify_extract("_@$.$@_+", "_@$.$@_", "+")); - TEST_DO(verify_extract("_@$(p1,p2)+", "_@$(p1,p2)", "+")); - TEST_DO(verify_extract("_@$(p1,p2).$@_+", "_@$(p1,p2).$@_", "+")); -} - -TEST("require that dot is only allowed in suffix") { - TEST_DO(verify_extract("foo.bar+", "foo.bar", "+")); - TEST_DO(verify_extract("foo.bar.out+", "foo.bar.out", "+")); - TEST_DO(verify_extract("foo.bar(p1,p2)+", "foo.bar", "(p1,p2)+")); - TEST_DO(verify_extract("foo.bar(p1,p2).out+", "foo.bar", "(p1,p2).out+")); - TEST_DO(verify_extract("foo(p1,p2).out.bar+", "foo(p1,p2).out.bar", "+")); -} - -TEST("require that parameters can be nested") { - TEST_DO(verify_extract("foo(p1(a,b),p2(c,d(e,f))).out+", "foo(p1(a,b),p2(c,d(e,f))).out", "+")); -} - -TEST("require that space is allowed among parameters") { - TEST_DO(verify_extract("foo( p1 ( a , b ) ).out+", "foo( p1 ( a , b ) ).out", "+")); -} - -TEST("require that space is now allowed outside parameters") { - TEST_DO(verify_extract("foo +", "foo", " +")); - TEST_DO(verify_extract("foo . out+", "foo", " . out+")); - TEST_DO(verify_extract("foo. out+", "foo.", " out+")); - TEST_DO(verify_extract("foo (p1,p2)+", "foo", " (p1,p2)+")); - TEST_DO(verify_extract("foo(p1,p2) +", "foo(p1,p2)", " +")); - TEST_DO(verify_extract("foo(p1,p2) .out+", "foo(p1,p2)", " .out+")); - TEST_DO(verify_extract("foo(p1,p2).out +", "foo(p1,p2).out", " +")); -} - -TEST("require that parameters can be scientific numbers") { - TEST_DO(verify_extract("foo(1.3E+3,-1.9e-10).out+", "foo(1.3E+3,-1.9e-10).out", "+")); -} - -TEST("require that quoted parenthesis are not counted") { - TEST_DO(verify_extract("foo(a,b,\")\").out+", "foo(a,b,\")\").out", "+")); -} - -TEST("require that escaped quotes does not unquote") { - TEST_DO(verify_extract("foo(a,b,\"\\\")\").out+", "foo(a,b,\"\\\")\").out", "+")); -} - -TEST("require that escaped escape does not hinder unquote") { - TEST_DO(verify_extract("foo(a,b,\"\\\\\")\").out+", "foo(a,b,\"\\\\\")", "\").out+")); -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt index 68b4c4bb043..715fbb4446e 100644 --- a/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt @@ -2,7 +2,6 @@ vespa_add_library(searchlib_features_rankingexpression OBJECT SOURCES expression_replacer.cpp - feature_name_extractor.cpp intrinsic_blueprint_adapter.cpp intrinsic_expression.cpp DEPENDS diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp deleted file mode 100644 index 1987f476780..00000000000 --- a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "feature_name_extractor.h" - -namespace search { -namespace features { -namespace rankingexpression { - -namespace { - -struct LegalChar { - bool legal[256]; - LegalChar(std::initializer_list<uint8_t> extra_chars) { - for (int c = 0; c < 256; ++c) { - legal[c] = isalnum(c); - } - for (uint8_t c: extra_chars) { - legal[c] = true; - } - } - bool is_legal(uint8_t c) { return legal[c]; } -}; - -static LegalChar prefix({'_', '$', '@'}); -static LegalChar suffix({'_', '.', '$', '@'}); - -struct CountParen { - size_t depth = 0; - bool quoted = false; - bool escaped = false; - bool done(char c) { - if (quoted) { - if (escaped) { - escaped = false; - } else { - if (c == '\\') { - escaped = true; - } else if (c == '"') { - quoted = false; - } - } - } else { - if (c == '"') { - quoted = true; - } else if (c == '(') { - ++depth; - } else if (c == ')') { - if (--depth == 0) { - return true; - } - } - } - return false; - } -}; - -} // namespace <unnamed> - -void -FeatureNameExtractor::extract_symbol(const char *pos_in, const char *end_in, - const char *&pos_out, vespalib::string &symbol_out) const -{ - while ((pos_in < end_in) && prefix.is_legal(*pos_in)) { - symbol_out.push_back(*pos_in++); - } - if ((pos_in < end_in) && (*pos_in == '(')) { - CountParen paren; - while (pos_in < end_in) { - symbol_out.push_back(*pos_in); - if (paren.done(*pos_in++)) { - break; - } - } - } - if ((pos_in < end_in) && (*pos_in == '.')) { - symbol_out.push_back(*pos_in++); - while ((pos_in < end_in) && suffix.is_legal(*pos_in)) { - symbol_out.push_back(*pos_in++); - } - } - pos_out = pos_in; -} - -} // namespace rankingexpression -} // namespace features -} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h index b7d82744953..4d59e95e7a3 100644 --- a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h +++ b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h @@ -2,22 +2,10 @@ #pragma once -#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/feature_name_extractor.h> -namespace search { -namespace features { -namespace rankingexpression { +namespace search::features::rankingexpression { -/** - * Custom symbol extractor used to extract ranking feature names when - * parsing ranking expressions. - **/ -struct FeatureNameExtractor : public vespalib::eval::SymbolExtractor { - void extract_symbol(const char *pos_in, const char *end_in, - const char *&pos_out, vespalib::string &symbol_out) const override; -}; - -} // namespace rankingexpression -} // namespace features -} // namespace search +using FeatureNameExtractor = vespalib::eval::FeatureNameExtractor; +} |