aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2023-09-25 16:39:53 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2023-09-26 14:42:47 +0000
commiteacc1e28f0b231a22d8c999eac5f9e79b88ecdb7 (patch)
tree8f0b93e81669c817c63880ab3d9367273cfc13db
parenta3f1ddde551d7c3092bcbdfb745e4e178da9be0f (diff)
use inline pre-generated tables
-rw-r--r--vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp109
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp90
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/table_dfa.h1
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp70
4 files changed, 226 insertions, 44 deletions
diff --git a/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp b/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp
index 7782a39c3c7..4ff48348790 100644
--- a/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp
+++ b/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp
@@ -172,20 +172,17 @@ TEST(TableDfaTest, format_bits) {
}
template <uint8_t N>
-void list_states(bool count_only = false) {
+void list_states() {
auto repo = make_state_repo<N>();
EXPECT_EQ(num_states<N>(), repo.size());
fprintf(stderr, "max_edits: %u, number of states: %zu\n", N, repo.size());
- if (!count_only) {
- for (uint32_t i = 0; i < repo.size(); ++i) {
- fprintf(stderr, " state %u: %s\n", i, repo.idx_to_state(i).to_string().c_str());
- }
+ for (uint32_t i = 0; i < repo.size(); ++i) {
+ fprintf(stderr, " state %u: %s\n", i, repo.idx_to_state(i).to_string().c_str());
}
}
TEST(TableDfaTest, list_states_for_max_edits_1) { list_states<1>(); }
TEST(TableDfaTest, list_states_for_max_edits_2) { list_states<2>(); }
-TEST(TableDfaTest, count_states_for_max_edits_3) { list_states<3>(true); }
template <uint8_t N>
void list_edits() {
@@ -295,4 +292,104 @@ TEST(TableDfaTest, graphviz_for_food_with_max_edits_1) {
fprintf(stderr, "%s", out.str().c_str());
}
+template <uint8_t N>
+void verify_inline_tfa() {
+ auto tfa = make_tfa<N>();
+ fprintf(stderr, "verifying TFA for N = %u (byte size: %zu)\n", N, sizeof(*tfa));
+ ASSERT_EQ(tfa->table.size(), num_states<N>());
+ ASSERT_EQ(tfa->edits.size(), num_states<N>());
+ for (size_t state = 0; state < num_states<N>(); ++state) {
+ ASSERT_EQ(tfa->table[state].size(), num_transitions<N>());
+ for (size_t transition = 0; transition < num_transitions<N>(); ++transition) {
+ EXPECT_EQ(tfa->table[state][transition].step, InlineTfa<N>::table[state][transition].step);
+ EXPECT_EQ(tfa->table[state][transition].state, InlineTfa<N>::table[state][transition].state);
+ }
+ ASSERT_EQ(tfa->edits[state].size(), window_size<N>());
+ for (size_t offset = 0; offset < window_size<N>(); ++offset) {
+ EXPECT_EQ(tfa->edits[state][offset], InlineTfa<N>::edits[state][offset]);
+ }
+ }
+}
+
+TEST(TableDfaTest, verify_inline_tfa_with_max_edits_1) {
+ verify_inline_tfa<1>();
+}
+
+TEST(TableDfaTest, verify_inline_tfa_with_max_edits_2) {
+ verify_inline_tfa<2>();
+}
+
+template <uint8_t N>
+void dump_tfa_as_code() {
+ auto tfa = make_tfa<N>();
+ fprintf(stderr, "// start of auto-generated code for N = %u\n", N);
+ fprintf(stderr, "template <> struct InlineTfa<%u> {\n", N);
+ fprintf(stderr, " static constexpr Transition table[%zu][%zu] = {\n", num_states<N>(), num_transitions<N>());
+ for (size_t state = 0; state < num_states<N>(); ++state) {
+ fprintf(stderr, " {");
+ for (size_t transition = 0; transition < num_transitions<N>(); ++transition) {
+ if (transition > 0) {
+ fprintf(stderr, ",");
+ }
+ fprintf(stderr, "{%u,%u}", tfa->table[state][transition].step, tfa->table[state][transition].state);
+ }
+ fprintf(stderr, "}%s\n", ((state + 1) < num_states<N>()) ? "," : "");
+ }
+ fprintf(stderr, " };\n");
+ fprintf(stderr, " static constexpr uint8_t edits[%zu][%zu] = {\n", num_states<N>(), window_size<N>());
+ for (size_t state = 0; state < num_states<N>(); ++state) {
+ fprintf(stderr, " {");
+ for (size_t offset = 0; offset < window_size<N>(); ++offset) {
+ if (offset > 0) {
+ fprintf(stderr, ",");
+ }
+ fprintf(stderr, "%u", tfa->edits[state][offset]);
+ }
+ fprintf(stderr, "}%s\n", ((state + 1) < num_states<N>()) ? "," : "");
+ }
+ fprintf(stderr, " };\n");
+ fprintf(stderr, "};\n");
+ fprintf(stderr, "// end of auto-generated code for N = %u\n", N);
+}
+
+TEST(TableDfaTest, dump_tfa_with_max_edits_1_as_code) {
+ dump_tfa_as_code<1>();
+}
+
+TEST(TableDfaTest, dump_tfa_with_max_edits_2_as_code) {
+ dump_tfa_as_code<2>();
+}
+
+template <uint8_t N>
+void dump_tfa_graph() {
+ auto repo = make_state_repo<N>();
+ fprintf(stderr, "digraph tfa {\n");
+ for (uint32_t idx = 0; idx < repo.size(); ++idx) {
+ fprintf(stderr, " %u [label=\"%s\"];\n", idx,
+ repo.idx_to_state(idx).to_string().c_str());
+ }
+ // omit transitions from the failure state to itself
+ for (uint32_t idx = 1; idx < repo.size(); ++idx) {
+ const State &state = repo.idx_to_state(idx);
+ for (uint32_t i = 0; i < num_transitions<N>(); ++i) {
+ auto bits = expand_bits<N>(i);
+ State new_state = state.next<N>(bits);
+ uint32_t step = new_state.normalize();
+ uint32_t new_idx = repo.state_to_idx(new_state);
+ ASSERT_LT(new_idx, repo.size());
+ if (bits[0] && idx == new_idx && step == 1) {
+ // omit simple transitions to yourself
+ } else {
+ fprintf(stderr, " %u -> %u [label=\"%s,%u\"];\n", idx, new_idx,
+ format_vector(bits, true).c_str(), step);
+ }
+ }
+ }
+ fprintf(stderr, "}\n");
+}
+
+TEST(TableDfaTest, graphviz_for_tfa_with_max_edits_1) {
+ dump_tfa_graph<1>();
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp
new file mode 100644
index 00000000000..fbec451bca0
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp
@@ -0,0 +1,90 @@
+// start of auto-generated code for N = 1
+template <> struct InlineTfa<1> {
+ static constexpr Transition table[6][8] = {
+ {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}},
+ {{0,2},{0,2},{0,3},{0,3},{1,1},{1,1},{1,1},{1,1}},
+ {{0,0},{0,0},{2,4},{2,4},{1,4},{1,4},{1,2},{1,2}},
+ {{0,0},{3,4},{2,4},{2,2},{1,4},{1,5},{1,2},{1,3}},
+ {{0,0},{0,0},{0,0},{0,0},{1,4},{1,4},{1,4},{1,4}},
+ {{0,0},{3,4},{0,0},{3,4},{1,4},{1,5},{1,4},{1,5}}
+ };
+ static constexpr uint8_t edits[6][3] = {
+ {2,2,2},
+ {0,1,2},
+ {1,1,2},
+ {1,1,1},
+ {1,2,2},
+ {1,2,1}
+ };
+};
+// end of auto-generated code for N = 1
+// start of auto-generated code for N = 2
+template <> struct InlineTfa<2> {
+ static constexpr Transition table[31][32] = {
+ {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}},
+ {{0,2},{0,2},{0,2},{0,2},{0,3},{0,3},{0,3},{0,3},{0,4},{0,4},{0,4},{0,4},{0,4},{0,4},{0,4},{0,4},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1}},
+ {{0,5},{0,5},{0,5},{0,5},{0,6},{0,6},{0,6},{0,6},{0,7},{0,7},{0,7},{0,7},{0,7},{0,7},{0,7},{0,7},{1,8},{1,8},{1,8},{1,8},{1,9},{1,9},{1,9},{1,9},{1,2},{1,2},{1,2},{1,2},{1,2},{1,2},{1,2},{1,2}},
+ {{0,5},{0,5},{0,10},{0,10},{0,6},{0,6},{0,11},{0,11},{0,7},{0,7},{0,12},{0,12},{0,7},{0,7},{0,12},{0,12},{1,8},{1,8},{1,13},{1,13},{1,9},{1,9},{1,14},{1,14},{1,2},{1,2},{1,3},{1,3},{1,2},{1,2},{1,3},{1,3}},
+ {{0,6},{0,6},{0,11},{0,11},{0,15},{0,15},{0,15},{0,15},{0,7},{0,7},{0,12},{0,12},{0,16},{0,16},{0,16},{0,16},{1,9},{1,9},{1,14},{1,14},{1,17},{1,17},{1,17},{1,17},{1,2},{1,2},{1,3},{1,3},{1,4},{1,4},{1,4},{1,4}},
+ {{0,0},{0,0},{0,0},{0,0},{3,18},{3,18},{3,18},{3,18},{2,18},{2,18},{2,18},{2,18},{2,19},{2,19},{2,19},{2,19},{1,18},{1,18},{1,18},{1,18},{1,20},{1,20},{1,20},{1,20},{1,19},{1,19},{1,19},{1,19},{1,5},{1,5},{1,5},{1,5}},
+ {{0,0},{0,0},{4,18},{4,18},{3,18},{3,18},{3,19},{3,19},{2,18},{2,18},{2,20},{2,20},{2,19},{2,19},{2,5},{2,5},{1,18},{1,18},{1,21},{1,21},{1,20},{1,20},{1,22},{1,22},{1,19},{1,19},{1,23},{1,23},{1,5},{1,5},{1,6},{1,6}},
+ {{2,19},{2,19},{2,5},{2,5},{3,8},{3,8},{3,8},{3,8},{2,19},{2,19},{2,5},{2,5},{3,8},{3,8},{3,8},{3,8},{1,5},{1,5},{1,6},{1,6},{1,7},{1,7},{1,7},{1,7},{1,5},{1,5},{1,6},{1,6},{1,7},{1,7},{1,7},{1,7}},
+ {{0,19},{0,19},{0,19},{0,19},{0,19},{0,19},{0,19},{0,19},{0,5},{0,5},{0,5},{0,5},{0,5},{0,5},{0,5},{0,5},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8}},
+ {{0,19},{0,19},{0,19},{0,19},{0,23},{0,23},{0,23},{0,23},{0,5},{0,5},{0,5},{0,5},{0,6},{0,6},{0,6},{0,6},{1,8},{1,8},{1,8},{1,8},{1,9},{1,9},{1,9},{1,9},{1,8},{1,8},{1,8},{1,8},{1,9},{1,9},{1,9},{1,9}},
+ {{0,0},{5,18},{0,0},{5,18},{3,18},{3,20},{3,18},{3,20},{2,18},{2,21},{2,18},{2,21},{2,19},{2,23},{2,19},{2,23},{1,18},{1,24},{1,18},{1,24},{1,20},{1,25},{1,20},{1,25},{1,19},{1,26},{1,19},{1,26},{1,5},{1,10},{1,5},{1,10}},
+ {{0,0},{5,18},{4,18},{4,19},{3,18},{3,20},{3,19},{3,5},{2,18},{2,21},{2,20},{2,22},{2,19},{2,23},{2,5},{2,6},{1,18},{1,24},{1,21},{1,27},{1,20},{1,25},{1,22},{1,28},{1,19},{1,26},{1,23},{1,29},{1,5},{1,10},{1,6},{1,11}},
+ {{2,19},{2,23},{2,5},{2,6},{3,8},{3,9},{3,8},{3,9},{2,19},{2,23},{2,5},{2,6},{3,8},{3,9},{3,8},{3,9},{1,5},{1,10},{1,6},{1,11},{1,7},{1,12},{1,7},{1,12},{1,5},{1,10},{1,6},{1,11},{1,7},{1,12},{1,7},{1,12}},
+ {{0,19},{0,19},{0,26},{0,26},{0,19},{0,19},{0,26},{0,26},{0,5},{0,5},{0,10},{0,10},{0,5},{0,5},{0,10},{0,10},{1,8},{1,8},{1,13},{1,13},{1,8},{1,8},{1,13},{1,13},{1,8},{1,8},{1,13},{1,13},{1,8},{1,8},{1,13},{1,13}},
+ {{0,19},{0,19},{0,26},{0,26},{0,23},{0,23},{0,29},{0,29},{0,5},{0,5},{0,10},{0,10},{0,6},{0,6},{0,11},{0,11},{1,8},{1,8},{1,13},{1,13},{1,9},{1,9},{1,14},{1,14},{1,8},{1,8},{1,13},{1,13},{1,9},{1,9},{1,14},{1,14}},
+ {{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{2,5},{2,6},{2,7},{2,7},{2,5},{2,6},{2,7},{2,7},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30},{1,6},{1,11},{1,15},{1,15},{1,6},{1,11},{1,15},{1,15}},
+ {{2,5},{2,6},{2,7},{2,7},{3,8},{3,9},{3,2},{3,2},{2,5},{2,6},{2,7},{2,7},{3,8},{3,9},{3,2},{3,2},{1,6},{1,11},{1,15},{1,15},{1,7},{1,12},{1,16},{1,16},{1,6},{1,11},{1,15},{1,15},{1,7},{1,12},{1,16},{1,16}},
+ {{0,6},{0,6},{0,11},{0,11},{0,15},{0,15},{0,15},{0,15},{0,6},{0,6},{0,11},{0,11},{0,15},{0,15},{0,15},{0,15},{1,9},{1,9},{1,14},{1,14},{1,17},{1,17},{1,17},{1,17},{1,9},{1,9},{1,14},{1,14},{1,17},{1,17},{1,17},{1,17}},
+ {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18}},
+ {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{2,18},{2,18},{2,18},{2,18},{2,18},{2,18},{2,18},{2,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,19},{1,19},{1,19},{1,19},{1,19},{1,19},{1,19},{1,19}},
+ {{0,0},{0,0},{0,0},{0,0},{3,18},{3,18},{3,18},{3,18},{0,0},{0,0},{0,0},{0,0},{3,18},{3,18},{3,18},{3,18},{1,18},{1,18},{1,18},{1,18},{1,20},{1,20},{1,20},{1,20},{1,18},{1,18},{1,18},{1,18},{1,20},{1,20},{1,20},{1,20}},
+ {{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21}},
+ {{0,0},{0,0},{4,18},{4,18},{3,18},{3,18},{3,19},{3,19},{0,0},{0,0},{4,18},{4,18},{3,18},{3,18},{3,19},{3,19},{1,18},{1,18},{1,21},{1,21},{1,20},{1,20},{1,22},{1,22},{1,18},{1,18},{1,21},{1,21},{1,20},{1,20},{1,22},{1,22}},
+ {{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{2,18},{2,18},{2,20},{2,20},{2,18},{2,18},{2,20},{2,20},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21},{1,19},{1,19},{1,23},{1,23},{1,19},{1,19},{1,23},{1,23}},
+ {{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24}},
+ {{0,0},{5,18},{0,0},{5,18},{3,18},{3,20},{3,18},{3,20},{0,0},{5,18},{0,0},{5,18},{3,18},{3,20},{3,18},{3,20},{1,18},{1,24},{1,18},{1,24},{1,20},{1,25},{1,20},{1,25},{1,18},{1,24},{1,18},{1,24},{1,20},{1,25},{1,20},{1,25}},
+ {{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{2,18},{2,21},{2,18},{2,21},{2,18},{2,21},{2,18},{2,21},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,19},{1,26},{1,19},{1,26},{1,19},{1,26},{1,19},{1,26}},
+ {{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27}},
+ {{0,0},{5,18},{4,18},{4,19},{3,18},{3,20},{3,19},{3,5},{0,0},{5,18},{4,18},{4,19},{3,18},{3,20},{3,19},{3,5},{1,18},{1,24},{1,21},{1,27},{1,20},{1,25},{1,22},{1,28},{1,18},{1,24},{1,21},{1,27},{1,20},{1,25},{1,22},{1,28}},
+ {{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{2,18},{2,21},{2,20},{2,22},{2,18},{2,21},{2,20},{2,22},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27},{1,19},{1,26},{1,23},{1,29},{1,19},{1,26},{1,23},{1,29}},
+ {{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30}}
+ };
+ static constexpr uint8_t edits[31][5] = {
+ {3,3,3,3,3},
+ {0,1,2,3,3},
+ {1,1,2,3,3},
+ {1,1,2,2,3},
+ {1,1,1,2,3},
+ {2,2,2,3,3},
+ {2,2,2,2,3},
+ {2,2,1,2,3},
+ {1,2,3,3,3},
+ {1,2,2,3,3},
+ {2,2,2,3,2},
+ {2,2,2,2,2},
+ {2,2,1,2,2},
+ {1,2,3,2,3},
+ {1,2,2,2,3},
+ {2,2,2,1,2},
+ {2,2,1,1,2},
+ {1,2,1,2,3},
+ {2,3,3,3,3},
+ {2,2,3,3,3},
+ {2,3,2,3,3},
+ {2,3,3,2,3},
+ {2,3,2,2,3},
+ {2,2,3,2,3},
+ {2,3,3,3,2},
+ {2,3,2,3,2},
+ {2,2,3,3,2},
+ {2,3,3,2,2},
+ {2,3,2,2,2},
+ {2,2,3,2,2},
+ {2,3,2,1,2}
+ };
+};
+// end of auto-generated code for N = 2
diff --git a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h
index dc511a42d04..45c52c84bad 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h
+++ b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h
@@ -44,7 +44,6 @@ public:
};
private:
- const void *_tfa;
const std::vector<Lookup> _lookup;
const bool _is_cased;
diff --git a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp
index 0693343007e..d3cc9002e2b 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp
+++ b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp
@@ -196,7 +196,7 @@ struct StateRepo {
return refs[idx]->first;
}
};
-StateRepo::~StateRepo() = default;
+[[maybe_unused]] StateRepo::~StateRepo() = default;
template <uint8_t N>
std::vector<bool> expand_bits(uint32_t value) {
@@ -212,7 +212,7 @@ std::vector<bool> expand_bits(uint32_t value) {
}
template <uint8_t N>
-[[maybe_unused]] StateRepo make_state_repo() {
+StateRepo make_state_repo() {
StateRepo repo;
for (uint32_t idx = 0; idx < repo.size(); ++idx) {
const State &state = repo.idx_to_state(idx);
@@ -225,16 +225,20 @@ template <uint8_t N>
return repo;
}
-// this is the result of our efforts
+struct Transition {
+ uint8_t step;
+ uint8_t state;
+ constexpr Transition() noexcept : step(0), state(0) {}
+ constexpr Transition(uint8_t di, uint8_t ns) noexcept : step(di), state(ns) {}
+};
+
+template <uint8_t N> struct InlineTfa;
+#include "inline_tfa.hpp"
+
template <uint8_t N>
struct Tfa {
- struct Entry {
- uint8_t step;
- uint8_t state;
- };
-
// what happens when following a transition from a state?
- std::array<std::array<Entry,num_transitions<N>()>,num_states<N>()> table;
+ std::array<std::array<Transition,num_transitions<N>()>,num_states<N>()> table;
// how many edits did we use to match the target word?
std::array<std::array<uint8_t,window_size<N>()>,num_states<N>()> edits;
@@ -267,12 +271,6 @@ std::unique_ptr<Tfa<N>> make_tfa() {
return tfa;
}
-template <uint8_t N>
-const Tfa<N> *get_tfa() {
- static std::unique_ptr<Tfa<N>> tfa = make_tfa<N>();
- return tfa.get();
-}
-
template <typename T>
vespalib::string format_vector(const std::vector<T> &vector, bool compact = false) {
vespalib::string str = compact ? "" : "[";
@@ -296,56 +294,55 @@ struct TableMatcher {
// needed by dfa matcher concept (should use std::declval instead)
constexpr S() noexcept : index(0), state(0) {}
constexpr S(uint32_t i, uint32_t s) noexcept : index(i), state(s) {}
- S next(const Tfa<N> *tfa, uint32_t bits) noexcept {
- auto entry = tfa->table[state][bits];
+ S next(uint32_t bits) noexcept {
+ const auto &entry = InlineTfa<N>::table[state][bits];
return S(index + entry.step, entry.state);
}
- constexpr bool is_valid_edge(const Tfa<N> *tfa, uint32_t bits) const noexcept {
- return tfa->table[state][bits].state != 0;
+ constexpr bool is_valid_edge(uint32_t bits) const noexcept {
+ return InlineTfa<N>::table[state][bits].state != 0;
}
};
using StateType = S;
using StateParamType = StateType;
using EdgeType = uint32_t;
- const Tfa<N> *tfa;
const TableDfa<N>::Lookup *lookup;
const uint32_t end;
const bool cased;
- TableMatcher(const Tfa<N> *tfa_in, const TableDfa<N>::Lookup *lookup_in, uint32_t end_in, bool cased_in)
- noexcept : tfa(tfa_in), lookup(lookup_in), end(end_in), cased(cased_in) {}
+ TableMatcher(const TableDfa<N>::Lookup *lookup_in, uint32_t end_in, bool cased_in)
+ noexcept : lookup(lookup_in), end(end_in), cased(cased_in) {}
bool is_cased() const noexcept { return cased; }
static constexpr S start() noexcept { return S(0, 1); }
uint8_t match_edit_distance(S s) const noexcept {
uint32_t leap = (end - s.index);
- return (leap < window_size<N>()) ? tfa->edits[s.state][leap] : N + 1;
+ return (leap < window_size<N>()) ? InlineTfa<N>::edits[s.state][leap] : N + 1;
}
bool is_match(S s) const noexcept { return match_edit_distance(s) <= N; }
static constexpr bool can_match(S s) noexcept { return (s.state != 0); }
static constexpr bool valid_state(S s) noexcept { return (s.state != 0); }
- S match_wildcard(S s) const noexcept { return s.next(tfa, 0); }
+ S match_wildcard(S s) const noexcept { return s.next(0); }
S match_input(S s, uint32_t c) const noexcept {
const auto *slice = lookup[s.index].list.data();
for (size_t i = 0; i < window_size<N>() && slice[i].input != 0; ++i) {
if (slice[i].input == c) {
- return s.next(tfa, slice[i].match);
+ return s.next(slice[i].match);
}
}
return match_wildcard(s);
}
bool has_higher_out_edge(S s, uint32_t c) const noexcept {
- if (s.is_valid_edge(tfa, 0)) {
+ if (s.is_valid_edge(0)) {
return true;
}
const auto *slice = lookup[s.index].list.data();
for (size_t i = 0; i < window_size<N>() && slice[i].input > c; ++i) {
- if (s.is_valid_edge(tfa, slice[i].match)) {
+ if (s.is_valid_edge(slice[i].match)) {
return true;
}
}
@@ -356,7 +353,7 @@ struct TableMatcher {
const auto *slice = lookup[s.index].list.data();
for (size_t i = 0; i < window_size<N>() && slice[i].input >= c; ++i) {
if (slice[i].input == c) {
- return s.is_valid_edge(tfa, slice[i].match);
+ return s.is_valid_edge(slice[i].match);
}
}
return false;
@@ -366,7 +363,7 @@ struct TableMatcher {
const auto *slice = lookup[s.index].list.data();
size_t i = window_size<N>();
while (i-- > 0) {
- if (slice[i].input > c && s.is_valid_edge(tfa, slice[i].match)) {
+ if (slice[i].input > c && s.is_valid_edge(slice[i].match)) {
return slice[i].input;
}
}
@@ -377,7 +374,7 @@ struct TableMatcher {
const auto *slice = lookup[s.index].list.data();
size_t i = window_size<N>();
while (i-- > 0) {
- if (slice[i].input != 0 && s.is_valid_edge(tfa, slice[i].match)) {
+ if (slice[i].input != 0 && s.is_valid_edge(slice[i].match)) {
return slice[i].input;
}
}
@@ -436,8 +433,7 @@ TableDfa<N>::make_lookup(const std::vector<uint32_t> &str)->std::vector<Lookup>
template <uint8_t N>
TableDfa<N>::TableDfa(std::vector<uint32_t> str, bool is_cased)
- : _tfa(get_tfa<N>()),
- _lookup(make_lookup(str)),
+ : _lookup(make_lookup(str)),
_is_cased(is_cased)
{
}
@@ -449,7 +445,7 @@ template <uint8_t N>
LevenshteinDfa::MatchResult
TableDfa<N>::match(std::string_view u8str) const
{
- TableMatcher matcher(static_cast<const Tfa<N>*>(_tfa), _lookup.data(), _lookup.size() - 1, _is_cased);
+ TableMatcher<N> matcher(_lookup.data(), _lookup.size() - 1, _is_cased);
return MatchAlgorithm<N>::match(matcher, u8str);
}
@@ -457,7 +453,7 @@ template <uint8_t N>
LevenshteinDfa::MatchResult
TableDfa<N>::match(std::string_view u8str, std::string& successor_out) const
{
- TableMatcher matcher(static_cast<const Tfa<N>*>(_tfa), _lookup.data(), _lookup.size() - 1, _is_cased);
+ TableMatcher<N> matcher(_lookup.data(), _lookup.size() - 1, _is_cased);
return MatchAlgorithm<N>::match(matcher, u8str, successor_out);
}
@@ -465,7 +461,7 @@ template <uint8_t N>
LevenshteinDfa::MatchResult
TableDfa<N>::match(std::string_view u8str, std::vector<uint32_t>& successor_out) const
{
- TableMatcher matcher(static_cast<const Tfa<N>*>(_tfa), _lookup.data(), _lookup.size() - 1, _is_cased);
+ TableMatcher<N> matcher(_lookup.data(), _lookup.size() - 1, _is_cased);
return MatchAlgorithm<N>::match(matcher, u8str, successor_out);
}
@@ -486,10 +482,10 @@ TableDfa<N>::dump_as_graphviz(std::ostream &os) const
if (_lookup[i].list[j].input != 0) {
std::string as_utf8;
append_utf32_char(as_utf8, _lookup[i].list[j].input);
- os << " x" << i << " -> " << _lookup[i].list[j].match << " [label=\"" << as_utf8 << "\"];\n";
+ os << " x" << i << " -> " << format_vector(expand_bits<N>(_lookup[i].list[j].match), true) << " [label=\"" << as_utf8 << "\"];\n";
}
}
- os << " x" << i << " -> 0 [label=\"*\"];\n";
+ os << " x" << i << " -> " << format_vector(expand_bits<N>(0), true) << " [label=\"*\"];\n";
}
os << "}\n";
}