diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2019-05-22 22:32:10 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2019-05-22 22:48:22 +0000 |
commit | aa839c11eb6861401026f6fff3b4c2e68426f71c (patch) | |
tree | aa57a409b60acf306c74ac425780f8ec8150e782 | |
parent | 8e998d910756843efafb16e03690eec863af4ce6 (diff) |
Add optimized support for array<byte>
11 files changed, 105 insertions, 39 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 51a98bec3d9..626a470cb5c 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1,6 +1,4 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("prod_features_test"); #include "prod_features.h" #include <vespa/searchlib/attribute/attributeguard.h> @@ -50,6 +48,9 @@ LOG_SETUP("prod_features_test"); #include <vespa/vespalib/util/string_hash.h> #include <cmath> +#include <vespa/log/log.h> +LOG_SETUP("prod_features_test"); + using namespace search::features; using namespace search::fef; using namespace search::fef::test; @@ -1065,6 +1066,25 @@ verifyCorrectDotProductExecutor(BlueprintFactory & factory, vespalib::stringref EXPECT_EQUAL(1u, deps.output.size()); } +template<typename T> +void verifyArrayParser() +{ + std::vector<vespalib::string> v = {"(0:2,7:-3,1:-3)", "{0:2,7:-3,1:-3}", "[2 -3 0 0 0 0 0 -3]"}; + for(const vespalib::string & s : v) { + std::vector<T> out; + ArrayParser::parse(s, out); + EXPECT_EQUAL(8u, out.size()); + EXPECT_EQUAL(2, out[0]); + EXPECT_EQUAL(-3, out[1]); + EXPECT_EQUAL(0, out[2]); + EXPECT_EQUAL(0, out[3]); + EXPECT_EQUAL(0, out[4]); + EXPECT_EQUAL(0, out[5]); + EXPECT_EQUAL(0, out[6]); + EXPECT_EQUAL(-3, out[7]); + } +} + } void @@ -1159,22 +1179,12 @@ Test::testDotProduct() EXPECT_EQUAL(out.getVector()[1].second, -3.5); } } - { // Array parser - std::vector<vespalib::string> v = {"(0:2,7:-3,1:-3)", "{0:2,7:-3,1:-3}", "[2 -3 0 0 0 0 0 -3]"}; - for(const vespalib::string & s : v) { - std::vector<int32_t> out; - ArrayParser::parse(s, out); - EXPECT_EQUAL(8u, out.size()); - EXPECT_EQUAL(2, out[0]); - EXPECT_EQUAL(-3, out[1]); - EXPECT_EQUAL(0, out[2]); - EXPECT_EQUAL(0, out[3]); - EXPECT_EQUAL(0, out[4]); - EXPECT_EQUAL(0, out[5]); - EXPECT_EQUAL(0, out[6]); - EXPECT_EQUAL(-3, out[7]); - } - } + verifyArrayParser<int8_t>(); + verifyArrayParser<int16_t>(); + verifyArrayParser<int32_t>(); + verifyArrayParser<int64_t>(); + verifyArrayParser<float>(); + verifyArrayParser<double>(); { vespalib::string s = "[[1:3]]"; std::vector<int32_t> out; @@ -1202,13 +1212,12 @@ Test::testDotProduct() assertDotProduct(0, "(f:5,g:5)", 1, "wsextstr"); assertDotProduct(550, "(a:1,b:2,c:3,d:4,e:5)", 1, "wsextstr"); } - { // integer attribute - assertDotProduct(0, "()", 1, "wsint"); - assertDotProduct(0, "(6:5,7:5)", 1, "wsint"); - assertDotProduct(55, "(1:1,2:2,3:3,4:4,5:5)", 1, "wsint"); + for (const char * name : {"wsbyte", "wsint"}) { + assertDotProduct(0, "()", 1, name); + assertDotProduct(0, "(6:5,7:5)", 1, name); + assertDotProduct(55, "(1:1,2:2,3:3,4:4,5:5)", 1, name); } - std::vector<const char *> attributes = {"arrint", "arrfloat", "arrint_fast", "arrfloat_fast"}; - for (const char * name : attributes) { + for (const char * name : {"arrbyte", "arrint", "arrfloat", "arrint_fast", "arrfloat_fast"}) { assertDotProduct(0, "()", 1, name); assertDotProduct(0, "(6:5,7:5)", 1, name); assertDotProduct(55, "(0:1,1:2,2:3,3:4,4:5)", 1, name); @@ -1260,6 +1269,8 @@ Test::setupForDotProductTest(FtFeatureTest & ft) bool fastSearch; }; std::vector<Config> cfgList = { {"wsint", AVBT::INT32, AVCT::WSET, false}, + {"wsbyte", AVBT::INT8, AVCT::WSET, false}, + {"arrbyte", AVBT::INT8, AVCT::ARRAY, false}, {"arrint", AVBT::INT32, AVCT::ARRAY, false}, {"arrfloat", AVBT::FLOAT, AVCT::ARRAY, false}, {"arrint_fast", AVBT::INT32, AVCT::ARRAY, true}, diff --git a/searchlib/src/vespa/searchlib/features/array_parser.cpp b/searchlib/src/vespa/searchlib/features/array_parser.cpp index 7f6a5da7d4c..edfaed015e8 100644 --- a/searchlib/src/vespa/searchlib/features/array_parser.cpp +++ b/searchlib/src/vespa/searchlib/features/array_parser.cpp @@ -1,12 +1,17 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "array_parser.h" +#include "array_parser.hpp" #include <vespa/log/log.h> LOG_SETUP(".features.array_parser"); -namespace search { -namespace features { +namespace search::features { + +void +ArrayParser::parse(const vespalib::string &input, std::vector<int8_t> &output) +{ + parse<std::vector<int8_t>, int16_t>(input, output); +} void ArrayParser::logWarning(const vespalib::string &msg) @@ -14,5 +19,4 @@ ArrayParser::logWarning(const vespalib::string &msg) LOG(warning, "%s", msg.c_str()); } -} // namespace features -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/features/array_parser.h b/searchlib/src/vespa/searchlib/features/array_parser.h index 53247eabe2c..8bf6e9ca365 100644 --- a/searchlib/src/vespa/searchlib/features/array_parser.h +++ b/searchlib/src/vespa/searchlib/features/array_parser.h @@ -3,6 +3,7 @@ #pragma once #include <vespa/vespalib/stllike/string.h> +#include <vector> namespace search::features { @@ -37,9 +38,11 @@ public: uint32_t _index; }; - template <typename OutputType> + template <typename OutputType, typename T = typename OutputType::value_type> static void parse(const vespalib::string &input, OutputType &output); + static void parse(const vespalib::string &input, std::vector<int8_t> &output); + template <typename OutputType> static void parsePartial(const vespalib::string &input, OutputType &output); }; diff --git a/searchlib/src/vespa/searchlib/features/array_parser.hpp b/searchlib/src/vespa/searchlib/features/array_parser.hpp index 92abec3aab9..089cb038797 100644 --- a/searchlib/src/vespa/searchlib/features/array_parser.hpp +++ b/searchlib/src/vespa/searchlib/features/array_parser.hpp @@ -11,11 +11,11 @@ namespace search::features { -template <typename OutputType> +template <typename OutputType, typename T> void ArrayParser::parse(const vespalib::string &input, OutputType &output) { - typedef std::vector<ValueAndIndex<typename OutputType::value_type>> SparseVector; + typedef std::vector<ValueAndIndex<T>> SparseVector; SparseVector sparse; parsePartial(input, sparse); std::sort(sparse.begin(), sparse.end()); @@ -47,7 +47,7 @@ ArrayParser::parsePartial(const vespalib::string &input, OutputType &output) try { is >> key >> colon >> value; if ((colon == ':') && is.eof()) { - output.push_back(ValueAndIndexType(value, key)); + output.emplace_back(value, key); } else { logWarning(vespalib::make_string( "Could not parse item '%s' in query vector '%s', skipping. " @@ -73,7 +73,7 @@ ArrayParser::parsePartial(const vespalib::string &input, OutputType &output) while (!is.eof()) { try { is >> value; - output.push_back(ValueAndIndexType(value, index++)); + output.emplace_back(value, index++); } catch (vespalib::IllegalArgumentException & e) { logWarning(vespalib::make_string( "Could not parse item[%ld] = '%s' in query vector '%s', skipping. " diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index 021320699ec..97216ef5437 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -398,11 +398,11 @@ DotProductBlueprint::createInstance() const namespace { -template <typename T> +template <typename T, typename AsT = T> void parseVectors(const Property& prop, std::vector<T>& values, std::vector<uint32_t>& indexes) { - typedef std::vector<ArrayParser::ValueAndIndex<T>> SparseV; + typedef std::vector<ArrayParser::ValueAndIndex<AsT>> SparseV; SparseV sparse; ArrayParser::parsePartial(prop.get(), sparse); if ( ! sparse.empty()) { @@ -423,6 +423,12 @@ parseVectors(const Property& prop, std::vector<T>& values, std::vector<uint32_t> } } +template <> +void +parseVectors<int8_t, int8_t>(const Property& prop, std::vector<int8_t>& values, std::vector<uint32_t>& indexes) { + parseVectors<int8_t, int16_t>(prop, values, indexes); +} + } namespace dotproduct { diff --git a/searchlib/src/vespa/searchlib/features/utils.cpp b/searchlib/src/vespa/searchlib/features/utils.cpp index 86558e89ec9..3f68e69ff25 100644 --- a/searchlib/src/vespa/searchlib/features/utils.cpp +++ b/searchlib/src/vespa/searchlib/features/utils.cpp @@ -4,10 +4,25 @@ namespace search::features::util { -template double strToNum<double>(vespalib::stringref str); +template <> +uint8_t +strToNum<uint8_t>(vespalib::stringref str) { + return strToNum<uint16_t>(str); +} + +template <> +int8_t +strToNum<int8_t>(vespalib::stringref str) { + return strToNum<int16_t>(str); +} + +template double strToNum<double>(vespalib::stringref str); +template float strToNum<float>(vespalib::stringref str); +template uint16_t strToNum<uint16_t>(vespalib::stringref str); template uint32_t strToNum<uint32_t>(vespalib::stringref str); template uint64_t strToNum<uint64_t>(vespalib::stringref str); -template int32_t strToNum<int32_t>(vespalib::stringref str); -template int64_t strToNum<int64_t>(vespalib::stringref str); +template int16_t strToNum<int16_t>(vespalib::stringref str); +template int32_t strToNum<int32_t>(vespalib::stringref str); +template int64_t strToNum<int64_t>(vespalib::stringref str); } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp index 1bf7ea1c44c..d70071525c6 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp @@ -71,6 +71,17 @@ GenericAccelrator::dotProduct(const double * a, const double * b, size_t sz) con } int64_t +GenericAccelrator::dotProduct(const int8_t * a, const int8_t * b, size_t sz) const +{ + return multiplyAdd<int64_t, int8_t, 4>(a, b, sz); +} + +int64_t +GenericAccelrator::dotProduct(const int16_t * a, const int16_t * b, size_t sz) const +{ + return multiplyAdd<int64_t, int16_t, 4>(a, b, sz); +} +int64_t GenericAccelrator::dotProduct(const int32_t * a, const int32_t * b, size_t sz) const { return multiplyAdd<int64_t, int32_t, 4>(a, b, sz); diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h index 0115d5c55c7..f9aab3ae845 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h @@ -14,6 +14,8 @@ class GenericAccelrator : public IAccelrated public: float dotProduct(const float * a, const float * b, size_t sz) const override; double dotProduct(const double * a, const double * b, size_t sz) const override; + int64_t dotProduct(const int8_t * a, const int8_t * b, size_t sz) const override; + int64_t dotProduct(const int16_t * a, const int16_t * b, size_t sz) const override; int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const override; long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const override; void orBit(void * a, const void * b, size_t bytes) const override; diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h index 7c2f2547240..aae60279d06 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h @@ -18,6 +18,8 @@ public: typedef std::unique_ptr<IAccelrated> UP; virtual float dotProduct(const float * a, const float * b, size_t sz) const = 0; virtual double dotProduct(const double * a, const double * b, size_t sz) const = 0; + virtual int64_t dotProduct(const int8_t * a, const int8_t * b, size_t sz) const = 0; + virtual int64_t dotProduct(const int16_t * a, const int16_t * b, size_t sz) const = 0; virtual int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const = 0; virtual long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const = 0; virtual void orBit(void * a, const void * b, size_t bytes) const = 0; diff --git a/vespalib/src/vespa/vespalib/stllike/asciistream.cpp b/vespalib/src/vespa/vespalib/stllike/asciistream.cpp index c141d35e80e..30a963c374c 100644 --- a/vespalib/src/vespa/vespalib/stllike/asciistream.cpp +++ b/vespalib/src/vespa/vespalib/stllike/asciistream.cpp @@ -235,6 +235,17 @@ asciistream & asciistream::operator >> (char & v) return *this; } +asciistream & asciistream::operator >> (signed char & v) +{ + for (;(_rPos < length()) && std::isspace(_rbuf[_rPos]); _rPos++); + if (_rPos < length()) { + v = _rbuf[_rPos++]; + } else { + throwUnderflow(_rPos); + } + return *this; +} + asciistream & asciistream::operator >> (unsigned char & v) { for (;(_rPos < length()) && std::isspace(_rbuf[_rPos]); _rPos++); diff --git a/vespalib/src/vespa/vespalib/stllike/asciistream.h b/vespalib/src/vespa/vespalib/stllike/asciistream.h index 88c3e1f7fc8..2f11d902283 100644 --- a/vespalib/src/vespa/vespalib/stllike/asciistream.h +++ b/vespalib/src/vespa/vespalib/stllike/asciistream.h @@ -64,6 +64,7 @@ public: asciistream & operator >> (FloatModifier v) { _floatModifier = v; return *this; } asciistream & operator >> (bool & v); asciistream & operator >> (char & v); + asciistream & operator >> (signed char & v); asciistream & operator >> (unsigned char & v); asciistream & operator >> (std::string & v); asciistream & operator >> (string & v); |