aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-08-16 13:14:38 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-08-16 13:36:09 +0000
commitc6331f25f9b6912d649c4fbdd65476775a4b6192 (patch)
tree3151f3bd7f20d2d78629c5f3d5984679786bff2a /searchlib
parente2fd2769172926471e602190afabc87815d4063a (diff)
move FeatureNameExtractor
to make it available for use in vespa-eval-expr
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore1
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp79
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp86
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h20
7 files changed, 4 insertions, 192 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index ea47dddb99b..16a35a9ce9f 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -209,7 +209,6 @@ vespa_define_module(
src/tests/queryeval/weak_and_scorers
src/tests/queryeval/weighted_set_term
src/tests/queryeval/wrappers
- src/tests/rankingexpression/feature_name_extractor
src/tests/rankingexpression/intrinsic_blueprint_adapter
src/tests/ranksetup
src/tests/ranksetup/verify_feature
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore
deleted file mode 100644
index 88c86c1720e..00000000000
--- a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-searchlib_feature_name_extractor_test_app
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt
deleted file mode 100644
index a2e153c7527..00000000000
--- a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_feature_name_extractor_test_app TEST
- SOURCES
- feature_name_extractor_test.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_feature_name_extractor_test_app COMMAND searchlib_feature_name_extractor_test_app)
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp
deleted file mode 100644
index 7b3683f75d5..00000000000
--- a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
-#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h>
-
-using search::features::rankingexpression::FeatureNameExtractor;
-
-void verify_extract(const vespalib::string &input,
- const vespalib::string &expect_symbol,
- const vespalib::string &expect_after)
-{
- FeatureNameExtractor extractor;
- const char *pos_in = input.data();
- const char *end_in = input.data() + input.size();
- vespalib::string symbol_out;
- const char *pos_out = nullptr;
- extractor.extract_symbol(pos_in, end_in, pos_out, symbol_out);
- ASSERT_TRUE(pos_out != nullptr);
- vespalib::string after(pos_out, end_in);
- EXPECT_EQUAL(expect_symbol, symbol_out);
- EXPECT_EQUAL(expect_after, after);
-}
-
-TEST("require that basic names are extracted correctly") {
- TEST_DO(verify_extract("foo+", "foo", "+"));
- TEST_DO(verify_extract("foo.out+", "foo.out", "+"));
- TEST_DO(verify_extract("foo(p1,p2)+", "foo(p1,p2)", "+"));
- TEST_DO(verify_extract("foo(p1,p2).out+", "foo(p1,p2).out", "+"));
-}
-
-TEST("require that special characters are allowed in prefix and suffix") {
- TEST_DO(verify_extract("_@$+", "_@$", "+"));
- TEST_DO(verify_extract("_@$.$@_+", "_@$.$@_", "+"));
- TEST_DO(verify_extract("_@$(p1,p2)+", "_@$(p1,p2)", "+"));
- TEST_DO(verify_extract("_@$(p1,p2).$@_+", "_@$(p1,p2).$@_", "+"));
-}
-
-TEST("require that dot is only allowed in suffix") {
- TEST_DO(verify_extract("foo.bar+", "foo.bar", "+"));
- TEST_DO(verify_extract("foo.bar.out+", "foo.bar.out", "+"));
- TEST_DO(verify_extract("foo.bar(p1,p2)+", "foo.bar", "(p1,p2)+"));
- TEST_DO(verify_extract("foo.bar(p1,p2).out+", "foo.bar", "(p1,p2).out+"));
- TEST_DO(verify_extract("foo(p1,p2).out.bar+", "foo(p1,p2).out.bar", "+"));
-}
-
-TEST("require that parameters can be nested") {
- TEST_DO(verify_extract("foo(p1(a,b),p2(c,d(e,f))).out+", "foo(p1(a,b),p2(c,d(e,f))).out", "+"));
-}
-
-TEST("require that space is allowed among parameters") {
- TEST_DO(verify_extract("foo( p1 ( a , b ) ).out+", "foo( p1 ( a , b ) ).out", "+"));
-}
-
-TEST("require that space is now allowed outside parameters") {
- TEST_DO(verify_extract("foo +", "foo", " +"));
- TEST_DO(verify_extract("foo . out+", "foo", " . out+"));
- TEST_DO(verify_extract("foo. out+", "foo.", " out+"));
- TEST_DO(verify_extract("foo (p1,p2)+", "foo", " (p1,p2)+"));
- TEST_DO(verify_extract("foo(p1,p2) +", "foo(p1,p2)", " +"));
- TEST_DO(verify_extract("foo(p1,p2) .out+", "foo(p1,p2)", " .out+"));
- TEST_DO(verify_extract("foo(p1,p2).out +", "foo(p1,p2).out", " +"));
-}
-
-TEST("require that parameters can be scientific numbers") {
- TEST_DO(verify_extract("foo(1.3E+3,-1.9e-10).out+", "foo(1.3E+3,-1.9e-10).out", "+"));
-}
-
-TEST("require that quoted parenthesis are not counted") {
- TEST_DO(verify_extract("foo(a,b,\")\").out+", "foo(a,b,\")\").out", "+"));
-}
-
-TEST("require that escaped quotes does not unquote") {
- TEST_DO(verify_extract("foo(a,b,\"\\\")\").out+", "foo(a,b,\"\\\")\").out", "+"));
-}
-
-TEST("require that escaped escape does not hinder unquote") {
- TEST_DO(verify_extract("foo(a,b,\"\\\\\")\").out+", "foo(a,b,\"\\\\\")", "\").out+"));
-}
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt
index 68b4c4bb043..715fbb4446e 100644
--- a/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt
@@ -2,7 +2,6 @@
vespa_add_library(searchlib_features_rankingexpression OBJECT
SOURCES
expression_replacer.cpp
- feature_name_extractor.cpp
intrinsic_blueprint_adapter.cpp
intrinsic_expression.cpp
DEPENDS
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp
deleted file mode 100644
index 1987f476780..00000000000
--- a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "feature_name_extractor.h"
-
-namespace search {
-namespace features {
-namespace rankingexpression {
-
-namespace {
-
-struct LegalChar {
- bool legal[256];
- LegalChar(std::initializer_list<uint8_t> extra_chars) {
- for (int c = 0; c < 256; ++c) {
- legal[c] = isalnum(c);
- }
- for (uint8_t c: extra_chars) {
- legal[c] = true;
- }
- }
- bool is_legal(uint8_t c) { return legal[c]; }
-};
-
-static LegalChar prefix({'_', '$', '@'});
-static LegalChar suffix({'_', '.', '$', '@'});
-
-struct CountParen {
- size_t depth = 0;
- bool quoted = false;
- bool escaped = false;
- bool done(char c) {
- if (quoted) {
- if (escaped) {
- escaped = false;
- } else {
- if (c == '\\') {
- escaped = true;
- } else if (c == '"') {
- quoted = false;
- }
- }
- } else {
- if (c == '"') {
- quoted = true;
- } else if (c == '(') {
- ++depth;
- } else if (c == ')') {
- if (--depth == 0) {
- return true;
- }
- }
- }
- return false;
- }
-};
-
-} // namespace <unnamed>
-
-void
-FeatureNameExtractor::extract_symbol(const char *pos_in, const char *end_in,
- const char *&pos_out, vespalib::string &symbol_out) const
-{
- while ((pos_in < end_in) && prefix.is_legal(*pos_in)) {
- symbol_out.push_back(*pos_in++);
- }
- if ((pos_in < end_in) && (*pos_in == '(')) {
- CountParen paren;
- while (pos_in < end_in) {
- symbol_out.push_back(*pos_in);
- if (paren.done(*pos_in++)) {
- break;
- }
- }
- }
- if ((pos_in < end_in) && (*pos_in == '.')) {
- symbol_out.push_back(*pos_in++);
- while ((pos_in < end_in) && suffix.is_legal(*pos_in)) {
- symbol_out.push_back(*pos_in++);
- }
- }
- pos_out = pos_in;
-}
-
-} // namespace rankingexpression
-} // namespace features
-} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h
index b7d82744953..4d59e95e7a3 100644
--- a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h
+++ b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h
@@ -2,22 +2,10 @@
#pragma once
-#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/feature_name_extractor.h>
-namespace search {
-namespace features {
-namespace rankingexpression {
+namespace search::features::rankingexpression {
-/**
- * Custom symbol extractor used to extract ranking feature names when
- * parsing ranking expressions.
- **/
-struct FeatureNameExtractor : public vespalib::eval::SymbolExtractor {
- void extract_symbol(const char *pos_in, const char *end_in,
- const char *&pos_out, vespalib::string &symbol_out) const override;
-};
-
-} // namespace rankingexpression
-} // namespace features
-} // namespace search
+using FeatureNameExtractor = vespalib::eval::FeatureNameExtractor;
+}