Use the simple tokenizer, no need to pull in boost

author: Henning Baldersheim <balder@yahoo-inc.com> 2023-02-03 21:35:21 +0000
committer: Henning Baldersheim <balder@yahoo-inc.com> 2023-02-03 21:35:21 +0000
commit: 402bdf03a13360af51116bd10919e200952dafd0 (patch)
tree: 4eb09cea7b60915789867c07e041f4d584d5a5c0
parent: 972491cd956d6cbc6bf718c5319bcd973997dc17 (diff)
6 files changed, 59 insertions, 50 deletions
diff --git a/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp b/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp
index 50c48fd63f2..b39e9358a00 100644
--- a/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp
@@ -8,8 +8,7 @@
 #include <vespa/vespalib/geo/zcurve.h>
 #include <vespa/vespalib/util/issue.h>
 #include <vespa/vespalib/util/stash.h>
-#include <boost/algorithm/string/split.hpp>
-#include <boost/algorithm/string/classification.hpp>
+#include <vespa/vespalib/text/stringtokenizer.h>
 #include <cmath>
 
 #include <vespa/log/log.h>
@@ -50,9 +49,9 @@ DistanceToPathExecutor::execute(uint32_t docId)
             double len = std::sqrt(len2);
 
             // For each document location, do
-            for (uint32_t loc = 0; loc < _intBuf.size(); ++loc) {
+            for (long loc : _intBuf) {
                 int32_t x = 0, y = 0;
-                vespalib::geo::ZCurve::decode(_intBuf[loc], &x, &y);
+                vespalib::geo::ZCurve::decode(loc, &x, &y);
 
                 double u = 0, dx, dy;
                 if (len < 1e-6) {
@@ -132,17 +131,16 @@ DistanceToPathBlueprint::createExecutor(const search::fef::IQueryEnvironment &en
     std::vector<Vector2> path;
     search::fef::Property pro = env.getProperties().lookup(getName(), "path");
     if (pro.found()) {
-        vespalib::string str = pro.getAt(0);
+        vespalib::stringref str = pro.getAt(0);
         uint32_t len = str.size();
         if (str[0] == '(' && len > 1 && str[len - 1] == ')') {
             str = str.substr(1, len - 1); // remove braces
-            std::vector<vespalib::string> arr;
-            boost::split(arr, str, boost::is_any_of(","));
-            len = arr.size() - 1;
+            vespalib::StringTokenizer tokenizer(str);
+            len = tokenizer.size() - 1;
             for (uint32_t i = 0; i < len; i += 2) {
-                double x = util::strToNum<double>(arr[i]);
-                double y = util::strToNum<double>(arr[i + 1]);
-                path.push_back(Vector2(x, y));
+                auto x = util::strToNum<double>(tokenizer[i]);
+                auto y = util::strToNum<double>(tokenizer[i + 1]);
+                path.emplace_back(x, y);
             }
         }
     }
diff --git a/searchlib/src/vespa/searchlib/features/distancetopathfeature.h b/searchlib/src/vespa/searchlib/features/distancetopathfeature.h
index 2cce7c33899..4930e0653d4 100644
--- a/searchlib/src/vespa/searchlib/features/distancetopathfeature.h
+++ b/searchlib/src/vespa/searchlib/features/distancetopathfeature.h
@@ -10,7 +10,7 @@ namespace search::features {
  * Define the point type that makes up the end-points in our path.
  */
 struct Vector2 {
-    Vector2(double _x, double _y) : x(_x), y(_y) { }
+    Vector2(double _x, double _y) noexcept : x(_x), y(_y) { }
     double x, y;
 };
 
@@ -51,8 +51,8 @@ public:
     DistanceToPathBlueprint();
     ~DistanceToPathBlueprint() override;
     void visitDumpFeatures(const fef::IIndexEnvironment &env, fef::IDumpFeatureVisitor &visitor) const override;
-    fef::Blueprint::UP createInstance() const override;
-    fef::ParameterDescriptions getDescriptions() const override {
+    [[nodiscard]] fef::Blueprint::UP createInstance() const override;
+    [[nodiscard]] fef::ParameterDescriptions getDescriptions() const override {
         return fef::ParameterDescriptions().desc().string();
     }
 
diff --git a/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp b/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp
index 3c816870a6d..0030641f8da 100644
--- a/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp
+++ b/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp
@@ -2,8 +2,7 @@
 
 #include "functiontablefactory.h"
 #include <vespa/vespalib/locale/c.h>
-#include <boost/algorithm/string/split.hpp>
-#include <boost/algorithm/string/classification.hpp>
+#include <vespa/vespalib/text/stringtokenizer.h>
 #include <cmath>
 #include <vespa/log/log.h>
 LOG_SETUP(".fef.functiontablefactory");
@@ -53,7 +52,7 @@ FunctionTableFactory::createExpDecay(double w, double t, size_t len) const
 Table::SP
 FunctionTableFactory::createLogGrowth(double w, double t, double s, size_t len) const
 {
-    Table::SP table(new Table());
+    auto table = std::make_shared<Table>();
     for (size_t x = 0; x < len; ++x) {
         table->add(w * (std::log(1 + (x / s))) + t);
     }
@@ -105,7 +104,7 @@ FunctionTableFactory::createTable(const vespalib::string & name) const
     } else {
         LOG(warning, "Cannot create table for function '%s'. Could not be parsed.", name.c_str());
     }
-    return Table::SP(NULL);
+    return {};
 }
 
 bool
@@ -124,7 +123,10 @@ FunctionTableFactory::parseFunctionName(const vespalib::string & name, ParsedNam
     parsed.type = name.substr(0, ps);
     vespalib::string args = name.substr(ps + 1, pe - ps - 1);
     if (!args.empty()) {
-        boost::split(parsed.args, args, boost::is_any_of(","));
+        vespalib::StringTokenizer tokenizer(args);
+        for (const auto & token : tokenizer) {
+            parsed.args.emplace_back(token);
+        }
     }
     return true;
 }
diff --git a/searchlib/src/vespa/searchlib/fef/functiontablefactory.h b/searchlib/src/vespa/searchlib/fef/functiontablefactory.h
index f95df47973c..cdb5711cd18 100644
--- a/searchlib/src/vespa/searchlib/fef/functiontablefactory.h
+++ b/searchlib/src/vespa/searchlib/fef/functiontablefactory.h
@@ -21,36 +21,34 @@ public:
     struct ParsedName {
         vespalib::string type;
         std::vector<vespalib::string> args;
-        ParsedName() : type(), args() {}
+        ParsedName() noexcept : type(), args() {}
     };
 
-private:
-    size_t _defaultTableSize;
-
-    bool checkArgs(const std::vector<vespalib::string> & args, size_t exp, size_t & tableSize) const;
-    bool isSupported(const vespalib::string & type) const;
-    bool isExpDecay(const vespalib::string & type) const { return type == "expdecay"; }
-    bool isLogGrowth(const vespalib::string & type) const { return type == "loggrowth"; }
-    bool isLinear(const vespalib::string & type) const { return type == "linear"; }
-    Table::SP createExpDecay(double w, double t, size_t len) const;
-    Table::SP createLogGrowth(double w, double t, double s, size_t len) const;
-    Table::SP createLinear(double w, double t, size_t len) const;
-
-public:
     /**
      * Creates a new factory able to create tables with the given default size.
      **/
-    FunctionTableFactory(size_t defaultTableSize);
+    explicit FunctionTableFactory(size_t defaultTableSize);
 
     /**
      * Creates a table where the given name specifies the function and arguments to use.
      **/
-    Table::SP createTable(const vespalib::string & name) const override;
+    [[nodiscard]] Table::SP createTable(const vespalib::string & name) const override;
 
     /**
      * Parses the given function name and returns true if success.
      **/
     static bool parseFunctionName(const vespalib::string & name, ParsedName & parsed);
+private:
+    size_t _defaultTableSize;
+
+    bool checkArgs(const std::vector<vespalib::string> & args, size_t exp, size_t & tableSize) const;
+    bool isSupported(const vespalib::string & type) const;
+    bool isExpDecay(const vespalib::string & type) const { return type == "expdecay"; }
+    bool isLogGrowth(const vespalib::string & type) const { return type == "loggrowth"; }
+    bool isLinear(const vespalib::string & type) const { return type == "linear"; }
+    Table::SP createExpDecay(double w, double t, size_t len) const;
+    Table::SP createLogGrowth(double w, double t, double s, size_t len) const;
+    Table::SP createLinear(double w, double t, size_t len) const;
 };
 
 }
diff --git a/vespalib/src/vespa/vespalib/text/stringtokenizer.cpp b/vespalib/src/vespa/vespalib/text/stringtokenizer.cpp
index 17f08ea7f05..da7fc262669 100644
--- a/vespalib/src/vespa/vespalib/text/stringtokenizer.cpp
+++ b/vespalib/src/vespa/vespalib/text/stringtokenizer.cpp
@@ -7,13 +7,13 @@ namespace {
 class AsciiSet
 {
 public:
-    AsciiSet(vespalib::stringref s) {
+    explicit AsciiSet(vespalib::stringref s) {
         memset(_set, 0, sizeof(_set));
-        for (size_t i(0), m(s.size()); i < m; i++) {
-            add(s[i]);
+        for (char c : s) {
+            add(c);
         }
     }
-    bool contains(uint8_t c) const {
+    [[nodiscard]] bool contains(uint8_t c) const {
         return _set[c];
     }
     void add(uint8_t c) {
@@ -48,8 +48,8 @@ stripString(vespalib::stringref source, const AsciiSet & strip)
 size_t
 countSeparators(vespalib::stringref source, const AsciiSet & sep) {
     size_t count(0);
-    for (Token::size_type i = 0; i < source.size(); ++i) {
-        if (sep.contains(source[i])) {
+    for (char c : source) {
+        if (sep.contains(c)) {
             count++;
         }
     }
@@ -68,7 +68,7 @@ parse(TokenList& output, vespalib::stringref source, const AsciiSet & separators
     }
     output.push_back(stripString(source.substr(start), strip));
     // Don't keep a single empty element
-    if (output.size() == 1 && output[0].size() == 0) output.pop_back();
+    if (output.size() == 1 && output[0].empty()) output.pop_back();
 }
 
 } // private namespace
@@ -86,6 +86,8 @@ StringTokenizer::StringTokenizer(vespalib::stringref source,
     parse(_tokens, source, sep, str);
 }
 
+StringTokenizer::~StringTokenizer() = default;
+
 void
 StringTokenizer::removeEmptyTokens()
 {
diff --git a/vespalib/src/vespa/vespalib/text/stringtokenizer.h b/vespalib/src/vespa/vespalib/text/stringtokenizer.h
index a2c9b3520a1..49f0e6d1050 100644
--- a/vespalib/src/vespa/vespalib/text/stringtokenizer.h
+++ b/vespalib/src/vespa/vespalib/text/stringtokenizer.h
@@ -41,25 +41,34 @@ public:
      * @param separators The characters to be used as token separators
      * @param strip      Characters to be stripped from both ends of each token
      **/
+    explicit StringTokenizer(vespalib::stringref source)
+        : StringTokenizer(source, ",")
+    {}
     StringTokenizer(vespalib::stringref source,
-                    vespalib::stringref separators = ",",
-                    vespalib::stringref strip = " \t\f\r\n");
+                             vespalib::stringref separators)
+        : StringTokenizer(source, separators, " \t\f\r\n")
+    {}
+    StringTokenizer(vespalib::stringref source,
+                    vespalib::stringref separators,
+                    vespalib::stringref strip);
+    StringTokenizer(StringTokenizer &&) noexcept = default;
+    StringTokenizer & operator=(StringTokenizer &&) noexcept = default;
+    ~StringTokenizer();
 
     /** Remove any empty tokens from the token list */
     void removeEmptyTokens();
 
     /** How many tokens is in the current token list? */
-    unsigned int size() const { return _tokens.size(); }
+    [[nodiscard]] unsigned int size() const { return _tokens.size(); }
 
     /** Access a token from the current token list */
-    const Token & operator[](unsigned int index) const
-        { return _tokens[index]; }
+    const Token & operator[](unsigned int index) const { return _tokens[index]; }
 
-    Iterator begin() const { return _tokens.begin(); }
-    Iterator end() const { return _tokens.end(); }
+    [[nodiscard]] Iterator begin() const { return _tokens.begin(); }
+    [[nodiscard]] Iterator end() const { return _tokens.end(); }
 
     /** Access the entire token list */
-    const TokenList & getTokens() const { return _tokens; }
+    [[nodiscard]] const TokenList & getTokens() const { return _tokens; }
 
 private:
     TokenList _tokens;
author	Henning Baldersheim <balder@yahoo-inc.com>	2023-02-03 21:35:21 +0000
committer	Henning Baldersheim <balder@yahoo-inc.com>	2023-02-03 21:35:21 +0000
commit	402bdf03a13360af51116bd10919e200952dafd0 (patch)
tree	4eb09cea7b60915789867c07e041f4d584d5a5c0
parent	972491cd956d6cbc6bf718c5319bcd973997dc17 (diff)