summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2021-09-22 15:40:53 +0200
committerGitHub <noreply@github.com>2021-09-22 15:40:53 +0200
commita1821d4e2275e1f2071a38123dfdd2c57f4313e2 (patch)
tree5db2f3d34e0f07cd06a7777dba22e89e30e237ab
parent1e1fd130368c357f18389fd1c8b43dc99e2a5fd1 (diff)
parent2293c5de7dfdf0adf738eea13adddaa8ef5fab27 (diff)
Merge pull request #19242 from vespa-engine/havardpe/hamming-function
added new 'hamming' function
-rw-r--r--eval/src/apps/tensor_conformance/generate.cpp17
-rw-r--r--eval/src/apps/tensor_conformance/generate.h5
-rw-r--r--eval/src/apps/tensor_conformance/tensor_conformance.cpp9
-rw-r--r--eval/src/tests/eval/inline_operation/inline_operation_test.cpp1
-rw-r--r--eval/src/tests/eval/node_tools/node_tools_test.cpp1
-rw-r--r--eval/src/tests/eval/node_types/node_types_test.cpp1
-rw-r--r--eval/src/vespa/eval/eval/call_nodes.cpp1
-rw-r--r--eval/src/vespa/eval/eval/call_nodes.h1
-rw-r--r--eval/src/vespa/eval/eval/hamming_distance.h13
-rw-r--r--eval/src/vespa/eval/eval/key_gen.cpp1
-rw-r--r--eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp5
-rw-r--r--eval/src/vespa/eval/eval/llvm/llvm_wrapper.h1
-rw-r--r--eval/src/vespa/eval/eval/make_tensor_function.cpp3
-rw-r--r--eval/src/vespa/eval/eval/node_tools.cpp1
-rw-r--r--eval/src/vespa/eval/eval/node_types.cpp1
-rw-r--r--eval/src/vespa/eval/eval/node_visitor.h2
-rw-r--r--eval/src/vespa/eval/eval/operation.cpp3
-rw-r--r--eval/src/vespa/eval/eval/operation.h1
-rw-r--r--eval/src/vespa/eval/eval/test/eval_spec.cpp21
-rw-r--r--eval/src/vespa/eval/eval/test/reference_evaluation.cpp3
-rw-r--r--eval/src/vespa/eval/eval/visit_stuff.cpp1
21 files changed, 87 insertions, 5 deletions
diff --git a/eval/src/apps/tensor_conformance/generate.cpp b/eval/src/apps/tensor_conformance/generate.cpp
index 8a596ad38d4..4d757f6ebbc 100644
--- a/eval/src/apps/tensor_conformance/generate.cpp
+++ b/eval/src/apps/tensor_conformance/generate.cpp
@@ -14,6 +14,19 @@ using vespalib::make_string_short::fmt;
namespace {
+struct IgnoreJava : TestBuilder {
+ TestBuilder &dst;
+ IgnoreJava(TestBuilder &dst_in) : TestBuilder(dst_in.full), dst(dst_in) {}
+ void add(const vespalib::string &expression,
+ const std::map<vespalib::string,TensorSpec> &inputs,
+ const std::set<vespalib::string> &ignore) override
+ {
+ auto my_ignore = ignore;
+ my_ignore.insert("vespajlib");
+ dst.add(expression, inputs, my_ignore);
+ }
+};
+
//-----------------------------------------------------------------------------
const std::vector<vespalib::string> basic_layouts = {
@@ -273,6 +286,8 @@ void generate_join(TestBuilder &dst) {
generate_op2_join("min(a,b)", Div16(N()), dst);
generate_op2_join("max(a,b)", Div16(N()), dst);
generate_op2_join("bit(a,b)", Seq({-128, -43, -1, 0, 85, 127}), Seq({0, 1, 2, 3, 4, 5, 6, 7}), dst);
+ IgnoreJava ignore_java(dst);
+ generate_op2_join("hamming(a,b)", Seq({-128, -43, -1, 0, 85, 127}), ignore_java); // TODO: require java
// inverted lambda
generate_join_expr("join(a,b,f(a,b)(b-a))", Div16(N()), dst);
// custom lambda
@@ -331,6 +346,8 @@ void generate_merge(TestBuilder &dst) {
generate_op2_merge("min(a,b)", Div16(N()), dst);
generate_op2_merge("max(a,b)", Div16(N()), dst);
generate_op2_merge("bit(a,b)", Seq({-128, -43, -1, 0, 85, 127}), Seq({0, 1, 2, 3, 4, 5, 6, 7}), dst);
+ IgnoreJava ignore_java(dst);
+ generate_op2_merge("hamming(a,b)", Seq({-128, -43, -1, 0, 85, 127}), ignore_java); // TODO: require java
// inverted lambda
generate_merge_expr("merge(a,b,f(a,b)(b-a))", Div16(N()), dst);
// custom lambda
diff --git a/eval/src/apps/tensor_conformance/generate.h b/eval/src/apps/tensor_conformance/generate.h
index e9482b9015c..9aa90ae9a7a 100644
--- a/eval/src/apps/tensor_conformance/generate.h
+++ b/eval/src/apps/tensor_conformance/generate.h
@@ -18,11 +18,6 @@ struct TestBuilder {
{
add(expression, inputs, {});
}
- void add_ignore_java(const vespalib::string &expression,
- const std::map<vespalib::string,TensorSpec> &inputs)
- {
- add(expression, inputs, {"vespajlib"});
- }
virtual ~TestBuilder() {}
};
diff --git a/eval/src/apps/tensor_conformance/tensor_conformance.cpp b/eval/src/apps/tensor_conformance/tensor_conformance.cpp
index e6bbb1f8a41..6c28b1e652e 100644
--- a/eval/src/apps/tensor_conformance/tensor_conformance.cpp
+++ b/eval/src/apps/tensor_conformance/tensor_conformance.cpp
@@ -167,6 +167,15 @@ void print_test(const Inspector &test, OutputWriter &dst) {
}
auto result = eval_expr(test, prod_factory);
dst.printf("result: %s\n", result.to_string().c_str());
+ auto ignore = extract_fields(test["ignore"]);
+ if (!ignore.empty()) {
+ dst.printf("ignore:");
+ for (const auto &impl: ignore) {
+ REQUIRE(test["ignore"][impl].asBool());
+ dst.printf(" %s", impl.c_str());
+ }
+ dst.printf("\n");
+ }
}
//-----------------------------------------------------------------------------
diff --git a/eval/src/tests/eval/inline_operation/inline_operation_test.cpp b/eval/src/tests/eval/inline_operation/inline_operation_test.cpp
index ae5f503b680..8e765708574 100644
--- a/eval/src/tests/eval/inline_operation/inline_operation_test.cpp
+++ b/eval/src/tests/eval/inline_operation/inline_operation_test.cpp
@@ -116,6 +116,7 @@ TEST(InlineOperationTest, op2_lambdas_are_recognized) {
EXPECT_EQ(as_op2("min(a,b)"), &Min::f);
EXPECT_EQ(as_op2("max(a,b)"), &Max::f);
EXPECT_EQ(as_op2("bit(a,b)"), &Bit::f);
+ EXPECT_EQ(as_op2("hamming(a,b)"), &Hamming::f);
}
TEST(InlineOperationTest, op2_lambdas_are_recognized_with_different_parameter_names) {
diff --git a/eval/src/tests/eval/node_tools/node_tools_test.cpp b/eval/src/tests/eval/node_tools/node_tools_test.cpp
index e8296c01d73..b95ea2d4b14 100644
--- a/eval/src/tests/eval/node_tools/node_tools_test.cpp
+++ b/eval/src/tests/eval/node_tools/node_tools_test.cpp
@@ -101,6 +101,7 @@ TEST("require that call node types can be copied") {
TEST_DO(verify_copy("elu(a)"));
TEST_DO(verify_copy("erf(a)"));
TEST_DO(verify_copy("bit(a,b)"));
+ TEST_DO(verify_copy("hamming(a,b)"));
}
TEST("require that tensor node types can NOT be copied (yet)") {
diff --git a/eval/src/tests/eval/node_types/node_types_test.cpp b/eval/src/tests/eval/node_types/node_types_test.cpp
index b2373f0d8f5..5b860f0e1b3 100644
--- a/eval/src/tests/eval/node_types/node_types_test.cpp
+++ b/eval/src/tests/eval/node_types/node_types_test.cpp
@@ -219,6 +219,7 @@ TEST("require that various operations resolve appropriate type") {
TEST_DO(verify_op1("elu(%s)")); // Elu
TEST_DO(verify_op1("erf(%s)")); // Erf
TEST_DO(verify_op2("bit(%s,%s)")); // Bit
+ TEST_DO(verify_op2("hamming(%s,%s)")); // Hamming
}
TEST("require that map resolves correct type") {
diff --git a/eval/src/vespa/eval/eval/call_nodes.cpp b/eval/src/vespa/eval/eval/call_nodes.cpp
index 798583cf89a..95dbecdd153 100644
--- a/eval/src/vespa/eval/eval/call_nodes.cpp
+++ b/eval/src/vespa/eval/eval/call_nodes.cpp
@@ -44,6 +44,7 @@ CallRepo::CallRepo() : _map() {
add(nodes::Elu());
add(nodes::Erf());
add(nodes::Bit());
+ add(nodes::Hamming());
}
} // namespace vespalib::eval::nodes
diff --git a/eval/src/vespa/eval/eval/call_nodes.h b/eval/src/vespa/eval/eval/call_nodes.h
index 945aba69596..47fc5d6eccd 100644
--- a/eval/src/vespa/eval/eval/call_nodes.h
+++ b/eval/src/vespa/eval/eval/call_nodes.h
@@ -140,6 +140,7 @@ struct Sigmoid : CallHelper<Sigmoid> { Sigmoid() : Helper("sigmoid", 1) {} };
struct Elu : CallHelper<Elu> { Elu() : Helper("elu", 1) {} };
struct Erf : CallHelper<Erf> { Erf() : Helper("erf", 1) {} };
struct Bit : CallHelper<Bit> { Bit() : Helper("bit", 2) {} };
+struct Hamming : CallHelper<Hamming> { Hamming() : Helper("hamming", 2) {} };
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/hamming_distance.h b/eval/src/vespa/eval/eval/hamming_distance.h
new file mode 100644
index 00000000000..3419de3569f
--- /dev/null
+++ b/eval/src/vespa/eval/eval/hamming_distance.h
@@ -0,0 +1,13 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace vespalib::eval {
+
+inline double hamming_distance(double a, double b) {
+ uint8_t x = (uint8_t) a;
+ uint8_t y = (uint8_t) b;
+ return __builtin_popcount(x ^ y);
+}
+
+}
diff --git a/eval/src/vespa/eval/eval/key_gen.cpp b/eval/src/vespa/eval/eval/key_gen.cpp
index a40a8887119..cbbce61402c 100644
--- a/eval/src/vespa/eval/eval/key_gen.cpp
+++ b/eval/src/vespa/eval/eval/key_gen.cpp
@@ -88,6 +88,7 @@ struct KeyGen : public NodeVisitor, public NodeTraverser {
void visit(const Elu &) override { add_byte(61); }
void visit(const Erf &) override { add_byte(62); }
void visit(const Bit &) override { add_byte(63); }
+ void visit(const Hamming &) override { add_byte(64); }
// traverse
bool open(const Node &node) override { node.accept(*this); return true; }
diff --git a/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp
index 3e4f4fe8257..a101745dca0 100644
--- a/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp
+++ b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.cpp
@@ -5,6 +5,7 @@
#include <vespa/eval/eval/node_visitor.h>
#include <vespa/eval/eval/node_traverser.h>
#include <vespa/eval/eval/extract_bit.h>
+#include <vespa/eval/eval/hamming_distance.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/IR/IRBuilder.h>
@@ -31,6 +32,7 @@ double vespalib_eval_relu(double a) { return std::max(a, 0.0); }
double vespalib_eval_sigmoid(double a) { return 1.0 / (1.0 + std::exp(-1.0 * a)); }
double vespalib_eval_elu(double a) { return (a < 0) ? std::exp(a) - 1.0 : a; }
double vespalib_eval_bit(double a, double b) { return vespalib::eval::extract_bit(a, b); }
+double vespalib_eval_hamming(double a, double b) { return vespalib::eval::hamming_distance(a, b); }
using vespalib::eval::gbdt::Forest;
using resolve_function = double (*)(void *ctx, size_t idx);
@@ -651,6 +653,9 @@ struct FunctionBuilder : public NodeVisitor, public NodeTraverser {
void visit(const Bit &) override {
make_call_2("vespalib_eval_bit");
}
+ void visit(const Hamming &) override {
+ make_call_2("vespalib_eval_hamming");
+ }
};
FunctionBuilder::~FunctionBuilder() { }
diff --git a/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h
index e04b477750d..727954d59e9 100644
--- a/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h
+++ b/eval/src/vespa/eval/eval/llvm/llvm_wrapper.h
@@ -20,6 +20,7 @@ extern "C" {
double vespalib_eval_sigmoid(double a);
double vespalib_eval_elu(double a);
double vespalib_eval_bit(double a, double b);
+ double vespalib_eval_hamming(double a, double b);
};
namespace vespalib::eval {
diff --git a/eval/src/vespa/eval/eval/make_tensor_function.cpp b/eval/src/vespa/eval/eval/make_tensor_function.cpp
index 498be2a738b..7746676f86b 100644
--- a/eval/src/vespa/eval/eval/make_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/make_tensor_function.cpp
@@ -360,6 +360,9 @@ struct TensorFunctionBuilder : public NodeVisitor, public NodeTraverser {
void visit(const Bit &node) override {
make_join(node, operation::Bit::f);
}
+ void visit(const Hamming &node) override {
+ make_join(node, operation::Hamming::f);
+ }
//-------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/node_tools.cpp b/eval/src/vespa/eval/eval/node_tools.cpp
index fa2d16a2271..48ee1a90b67 100644
--- a/eval/src/vespa/eval/eval/node_tools.cpp
+++ b/eval/src/vespa/eval/eval/node_tools.cpp
@@ -183,6 +183,7 @@ struct CopyNode : NodeTraverser, NodeVisitor {
void visit(const Elu &node) override { copy_call(node); }
void visit(const Erf &node) override { copy_call(node); }
void visit(const Bit &node) override { copy_call(node); }
+ void visit(const Hamming &node) override { copy_call(node); }
// traverse nodes
bool open(const Node &) override { return !error; }
diff --git a/eval/src/vespa/eval/eval/node_types.cpp b/eval/src/vespa/eval/eval/node_types.cpp
index 8622fd734f1..2cb6e637201 100644
--- a/eval/src/vespa/eval/eval/node_types.cpp
+++ b/eval/src/vespa/eval/eval/node_types.cpp
@@ -279,6 +279,7 @@ struct TypeResolver : public NodeVisitor, public NodeTraverser {
void visit(const Elu &node) override { resolve_op1(node); }
void visit(const Erf &node) override { resolve_op1(node); }
void visit(const Bit &node) override { resolve_op2(node); }
+ void visit(const Hamming &node) override { resolve_op2(node); }
//-------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/node_visitor.h b/eval/src/vespa/eval/eval/node_visitor.h
index 475bbf5405c..b581a94f7ee 100644
--- a/eval/src/vespa/eval/eval/node_visitor.h
+++ b/eval/src/vespa/eval/eval/node_visitor.h
@@ -86,6 +86,7 @@ struct NodeVisitor {
virtual void visit(const nodes::Elu &) = 0;
virtual void visit(const nodes::Erf &) = 0;
virtual void visit(const nodes::Bit &) = 0;
+ virtual void visit(const nodes::Hamming &) = 0;
virtual ~NodeVisitor() {}
};
@@ -156,6 +157,7 @@ struct EmptyNodeVisitor : NodeVisitor {
void visit(const nodes::Elu &) override {}
void visit(const nodes::Erf &) override {}
void visit(const nodes::Bit &) override {}
+ void visit(const nodes::Hamming &) override {}
};
} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/operation.cpp b/eval/src/vespa/eval/eval/operation.cpp
index a82a79e6bc4..ddd188d250f 100644
--- a/eval/src/vespa/eval/eval/operation.cpp
+++ b/eval/src/vespa/eval/eval/operation.cpp
@@ -4,6 +4,7 @@
#include "function.h"
#include "key_gen.h"
#include "extract_bit.h"
+#include "hamming_distance.h"
#include <vespa/vespalib/util/approx.h>
#include <algorithm>
@@ -52,6 +53,7 @@ double Sigmoid::f(double a) { return 1.0 / (1.0 + std::exp(-1.0 * a)); }
double Elu::f(double a) { return (a < 0) ? std::exp(a) - 1 : a; }
double Erf::f(double a) { return std::erf(a); }
double Bit::f(double a, double b) { return extract_bit(a, b); }
+double Hamming::f(double a, double b) { return hamming_distance(a, b); }
//-----------------------------------------------------------------------------
double Inv::f(double a) { return (1.0 / a); }
double Square::f(double a) { return (a * a); }
@@ -146,6 +148,7 @@ std::map<vespalib::string,op2_t> make_op2_map() {
add_op2(map, "min(a,b)", Min::f);
add_op2(map, "max(a,b)", Max::f);
add_op2(map, "bit(a,b)", Bit::f);
+ add_op2(map, "hamming(a,b)", Hamming::f);
return map;
}
diff --git a/eval/src/vespa/eval/eval/operation.h b/eval/src/vespa/eval/eval/operation.h
index 438b510b714..e2a524f318c 100644
--- a/eval/src/vespa/eval/eval/operation.h
+++ b/eval/src/vespa/eval/eval/operation.h
@@ -50,6 +50,7 @@ struct Sigmoid { static double f(double a); };
struct Elu { static double f(double a); };
struct Erf { static double f(double a); };
struct Bit { static double f(double a, double b); };
+struct Hamming { static double f(double a, double b); };
//-----------------------------------------------------------------------------
struct Inv { static double f(double a); };
struct Square { static double f(double a); };
diff --git a/eval/src/vespa/eval/eval/test/eval_spec.cpp b/eval/src/vespa/eval/eval/test/eval_spec.cpp
index 5d51a1d23b5..03b3af84fc9 100644
--- a/eval/src/vespa/eval/eval/test/eval_spec.cpp
+++ b/eval/src/vespa/eval/eval/test/eval_spec.cpp
@@ -8,6 +8,24 @@
namespace vespalib::eval::test {
+namespace {
+
+double byte(const vespalib::string &bits) {
+ int8_t res = 0;
+ assert(bits.size() == 8);
+ for (const auto &c: bits) {
+ if (c == '1') {
+ res = (res << 1) | 1;
+ } else {
+ assert(c == '0');
+ res = (res << 1);
+ }
+ }
+ return res;
+}
+
+} // <unnamed>
+
constexpr double my_nan = std::numeric_limits<double>::quiet_NaN();
constexpr double my_inf = std::numeric_limits<double>::infinity();
@@ -169,6 +187,9 @@ EvalSpec::add_function_call_cases() {
.add_case({85, 3}, 0.0).add_case({85, 2}, 1.0).add_case({85, 1}, 0.0).add_case({85, 0}, 1.0)
.add_case({127, 7}, 0.0).add_case({127, 6}, 1.0).add_case({127, 5}, 1.0).add_case({127, 4}, 1.0)
.add_case({127, 3}, 1.0).add_case({127, 2}, 1.0).add_case({127, 1}, 1.0).add_case({127, 0}, 1.0);
+ add_expression({"a", "b"}, "hamming(a,b)")
+ .add_case({0, 0}, 0.0).add_case({-1, -1}, 0.0).add_case({-1, 0}, 8.0).add_case({0, -1}, 8.0)
+ .add_case({byte("11001100"), byte("10101010")}, 4.0).add_case({byte("11001100"), byte("11110000")}, 4.0);
}
void
diff --git a/eval/src/vespa/eval/eval/test/reference_evaluation.cpp b/eval/src/vespa/eval/eval/test/reference_evaluation.cpp
index 58e4b91f6d9..def3f64c1a1 100644
--- a/eval/src/vespa/eval/eval/test/reference_evaluation.cpp
+++ b/eval/src/vespa/eval/eval/test/reference_evaluation.cpp
@@ -338,6 +338,9 @@ struct EvalNode : public NodeVisitor {
void visit(const Bit &node) override {
eval_join(node.get_child(0), node.get_child(1), operation::Bit::f);
}
+ void visit(const Hamming &node) override {
+ eval_join(node.get_child(0), node.get_child(1), operation::Hamming::f);
+ }
};
TensorSpec eval_node(const Node &node, const std::vector<TensorSpec> &params) {
diff --git a/eval/src/vespa/eval/eval/visit_stuff.cpp b/eval/src/vespa/eval/eval/visit_stuff.cpp
index 786562d823f..1d684e1c340 100644
--- a/eval/src/vespa/eval/eval/visit_stuff.cpp
+++ b/eval/src/vespa/eval/eval/visit_stuff.cpp
@@ -60,6 +60,7 @@ vespalib::string name_of(join_fun_t fun) {
if (fun == operation::Min::f) return "min";
if (fun == operation::Max::f) return "max";
if (fun == operation::Bit::f) return "bit";
+ if (fun == operation::Hamming::f) return "hamming";
return "[other join function]";
}