From eacc1e28f0b231a22d8c999eac5f9e79b88ecdb7 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Mon, 25 Sep 2023 16:39:53 +0000 Subject: use inline pre-generated tables --- .../src/tests/fuzzy/table_dfa/table_dfa_test.cpp | 109 +++++++++++++++++++-- vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp | 90 +++++++++++++++++ vespalib/src/vespa/vespalib/fuzzy/table_dfa.h | 1 - vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp | 70 +++++++------ 4 files changed, 226 insertions(+), 44 deletions(-) create mode 100644 vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp diff --git a/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp b/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp index 7782a39c3c7..4ff48348790 100644 --- a/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp +++ b/vespalib/src/tests/fuzzy/table_dfa/table_dfa_test.cpp @@ -172,20 +172,17 @@ TEST(TableDfaTest, format_bits) { } template -void list_states(bool count_only = false) { +void list_states() { auto repo = make_state_repo(); EXPECT_EQ(num_states(), repo.size()); fprintf(stderr, "max_edits: %u, number of states: %zu\n", N, repo.size()); - if (!count_only) { - for (uint32_t i = 0; i < repo.size(); ++i) { - fprintf(stderr, " state %u: %s\n", i, repo.idx_to_state(i).to_string().c_str()); - } + for (uint32_t i = 0; i < repo.size(); ++i) { + fprintf(stderr, " state %u: %s\n", i, repo.idx_to_state(i).to_string().c_str()); } } TEST(TableDfaTest, list_states_for_max_edits_1) { list_states<1>(); } TEST(TableDfaTest, list_states_for_max_edits_2) { list_states<2>(); } -TEST(TableDfaTest, count_states_for_max_edits_3) { list_states<3>(true); } template void list_edits() { @@ -295,4 +292,104 @@ TEST(TableDfaTest, graphviz_for_food_with_max_edits_1) { fprintf(stderr, "%s", out.str().c_str()); } +template +void verify_inline_tfa() { + auto tfa = make_tfa(); + fprintf(stderr, "verifying TFA for N = %u (byte size: %zu)\n", N, sizeof(*tfa)); + ASSERT_EQ(tfa->table.size(), num_states()); + ASSERT_EQ(tfa->edits.size(), num_states()); + for (size_t state = 0; state < num_states(); ++state) { + ASSERT_EQ(tfa->table[state].size(), num_transitions()); + for (size_t transition = 0; transition < num_transitions(); ++transition) { + EXPECT_EQ(tfa->table[state][transition].step, InlineTfa::table[state][transition].step); + EXPECT_EQ(tfa->table[state][transition].state, InlineTfa::table[state][transition].state); + } + ASSERT_EQ(tfa->edits[state].size(), window_size()); + for (size_t offset = 0; offset < window_size(); ++offset) { + EXPECT_EQ(tfa->edits[state][offset], InlineTfa::edits[state][offset]); + } + } +} + +TEST(TableDfaTest, verify_inline_tfa_with_max_edits_1) { + verify_inline_tfa<1>(); +} + +TEST(TableDfaTest, verify_inline_tfa_with_max_edits_2) { + verify_inline_tfa<2>(); +} + +template +void dump_tfa_as_code() { + auto tfa = make_tfa(); + fprintf(stderr, "// start of auto-generated code for N = %u\n", N); + fprintf(stderr, "template <> struct InlineTfa<%u> {\n", N); + fprintf(stderr, " static constexpr Transition table[%zu][%zu] = {\n", num_states(), num_transitions()); + for (size_t state = 0; state < num_states(); ++state) { + fprintf(stderr, " {"); + for (size_t transition = 0; transition < num_transitions(); ++transition) { + if (transition > 0) { + fprintf(stderr, ","); + } + fprintf(stderr, "{%u,%u}", tfa->table[state][transition].step, tfa->table[state][transition].state); + } + fprintf(stderr, "}%s\n", ((state + 1) < num_states()) ? "," : ""); + } + fprintf(stderr, " };\n"); + fprintf(stderr, " static constexpr uint8_t edits[%zu][%zu] = {\n", num_states(), window_size()); + for (size_t state = 0; state < num_states(); ++state) { + fprintf(stderr, " {"); + for (size_t offset = 0; offset < window_size(); ++offset) { + if (offset > 0) { + fprintf(stderr, ","); + } + fprintf(stderr, "%u", tfa->edits[state][offset]); + } + fprintf(stderr, "}%s\n", ((state + 1) < num_states()) ? "," : ""); + } + fprintf(stderr, " };\n"); + fprintf(stderr, "};\n"); + fprintf(stderr, "// end of auto-generated code for N = %u\n", N); +} + +TEST(TableDfaTest, dump_tfa_with_max_edits_1_as_code) { + dump_tfa_as_code<1>(); +} + +TEST(TableDfaTest, dump_tfa_with_max_edits_2_as_code) { + dump_tfa_as_code<2>(); +} + +template +void dump_tfa_graph() { + auto repo = make_state_repo(); + fprintf(stderr, "digraph tfa {\n"); + for (uint32_t idx = 0; idx < repo.size(); ++idx) { + fprintf(stderr, " %u [label=\"%s\"];\n", idx, + repo.idx_to_state(idx).to_string().c_str()); + } + // omit transitions from the failure state to itself + for (uint32_t idx = 1; idx < repo.size(); ++idx) { + const State &state = repo.idx_to_state(idx); + for (uint32_t i = 0; i < num_transitions(); ++i) { + auto bits = expand_bits(i); + State new_state = state.next(bits); + uint32_t step = new_state.normalize(); + uint32_t new_idx = repo.state_to_idx(new_state); + ASSERT_LT(new_idx, repo.size()); + if (bits[0] && idx == new_idx && step == 1) { + // omit simple transitions to yourself + } else { + fprintf(stderr, " %u -> %u [label=\"%s,%u\"];\n", idx, new_idx, + format_vector(bits, true).c_str(), step); + } + } + } + fprintf(stderr, "}\n"); +} + +TEST(TableDfaTest, graphviz_for_tfa_with_max_edits_1) { + dump_tfa_graph<1>(); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp new file mode 100644 index 00000000000..fbec451bca0 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/inline_tfa.hpp @@ -0,0 +1,90 @@ +// start of auto-generated code for N = 1 +template <> struct InlineTfa<1> { + static constexpr Transition table[6][8] = { + {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{0,2},{0,2},{0,3},{0,3},{1,1},{1,1},{1,1},{1,1}}, + {{0,0},{0,0},{2,4},{2,4},{1,4},{1,4},{1,2},{1,2}}, + {{0,0},{3,4},{2,4},{2,2},{1,4},{1,5},{1,2},{1,3}}, + {{0,0},{0,0},{0,0},{0,0},{1,4},{1,4},{1,4},{1,4}}, + {{0,0},{3,4},{0,0},{3,4},{1,4},{1,5},{1,4},{1,5}} + }; + static constexpr uint8_t edits[6][3] = { + {2,2,2}, + {0,1,2}, + {1,1,2}, + {1,1,1}, + {1,2,2}, + {1,2,1} + }; +}; +// end of auto-generated code for N = 1 +// start of auto-generated code for N = 2 +template <> struct InlineTfa<2> { + static constexpr Transition table[31][32] = { + {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{0,2},{0,2},{0,2},{0,2},{0,3},{0,3},{0,3},{0,3},{0,4},{0,4},{0,4},{0,4},{0,4},{0,4},{0,4},{0,4},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1},{1,1}}, + {{0,5},{0,5},{0,5},{0,5},{0,6},{0,6},{0,6},{0,6},{0,7},{0,7},{0,7},{0,7},{0,7},{0,7},{0,7},{0,7},{1,8},{1,8},{1,8},{1,8},{1,9},{1,9},{1,9},{1,9},{1,2},{1,2},{1,2},{1,2},{1,2},{1,2},{1,2},{1,2}}, + {{0,5},{0,5},{0,10},{0,10},{0,6},{0,6},{0,11},{0,11},{0,7},{0,7},{0,12},{0,12},{0,7},{0,7},{0,12},{0,12},{1,8},{1,8},{1,13},{1,13},{1,9},{1,9},{1,14},{1,14},{1,2},{1,2},{1,3},{1,3},{1,2},{1,2},{1,3},{1,3}}, + {{0,6},{0,6},{0,11},{0,11},{0,15},{0,15},{0,15},{0,15},{0,7},{0,7},{0,12},{0,12},{0,16},{0,16},{0,16},{0,16},{1,9},{1,9},{1,14},{1,14},{1,17},{1,17},{1,17},{1,17},{1,2},{1,2},{1,3},{1,3},{1,4},{1,4},{1,4},{1,4}}, + {{0,0},{0,0},{0,0},{0,0},{3,18},{3,18},{3,18},{3,18},{2,18},{2,18},{2,18},{2,18},{2,19},{2,19},{2,19},{2,19},{1,18},{1,18},{1,18},{1,18},{1,20},{1,20},{1,20},{1,20},{1,19},{1,19},{1,19},{1,19},{1,5},{1,5},{1,5},{1,5}}, + {{0,0},{0,0},{4,18},{4,18},{3,18},{3,18},{3,19},{3,19},{2,18},{2,18},{2,20},{2,20},{2,19},{2,19},{2,5},{2,5},{1,18},{1,18},{1,21},{1,21},{1,20},{1,20},{1,22},{1,22},{1,19},{1,19},{1,23},{1,23},{1,5},{1,5},{1,6},{1,6}}, + {{2,19},{2,19},{2,5},{2,5},{3,8},{3,8},{3,8},{3,8},{2,19},{2,19},{2,5},{2,5},{3,8},{3,8},{3,8},{3,8},{1,5},{1,5},{1,6},{1,6},{1,7},{1,7},{1,7},{1,7},{1,5},{1,5},{1,6},{1,6},{1,7},{1,7},{1,7},{1,7}}, + {{0,19},{0,19},{0,19},{0,19},{0,19},{0,19},{0,19},{0,19},{0,5},{0,5},{0,5},{0,5},{0,5},{0,5},{0,5},{0,5},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8},{1,8}}, + {{0,19},{0,19},{0,19},{0,19},{0,23},{0,23},{0,23},{0,23},{0,5},{0,5},{0,5},{0,5},{0,6},{0,6},{0,6},{0,6},{1,8},{1,8},{1,8},{1,8},{1,9},{1,9},{1,9},{1,9},{1,8},{1,8},{1,8},{1,8},{1,9},{1,9},{1,9},{1,9}}, + {{0,0},{5,18},{0,0},{5,18},{3,18},{3,20},{3,18},{3,20},{2,18},{2,21},{2,18},{2,21},{2,19},{2,23},{2,19},{2,23},{1,18},{1,24},{1,18},{1,24},{1,20},{1,25},{1,20},{1,25},{1,19},{1,26},{1,19},{1,26},{1,5},{1,10},{1,5},{1,10}}, + {{0,0},{5,18},{4,18},{4,19},{3,18},{3,20},{3,19},{3,5},{2,18},{2,21},{2,20},{2,22},{2,19},{2,23},{2,5},{2,6},{1,18},{1,24},{1,21},{1,27},{1,20},{1,25},{1,22},{1,28},{1,19},{1,26},{1,23},{1,29},{1,5},{1,10},{1,6},{1,11}}, + {{2,19},{2,23},{2,5},{2,6},{3,8},{3,9},{3,8},{3,9},{2,19},{2,23},{2,5},{2,6},{3,8},{3,9},{3,8},{3,9},{1,5},{1,10},{1,6},{1,11},{1,7},{1,12},{1,7},{1,12},{1,5},{1,10},{1,6},{1,11},{1,7},{1,12},{1,7},{1,12}}, + {{0,19},{0,19},{0,26},{0,26},{0,19},{0,19},{0,26},{0,26},{0,5},{0,5},{0,10},{0,10},{0,5},{0,5},{0,10},{0,10},{1,8},{1,8},{1,13},{1,13},{1,8},{1,8},{1,13},{1,13},{1,8},{1,8},{1,13},{1,13},{1,8},{1,8},{1,13},{1,13}}, + {{0,19},{0,19},{0,26},{0,26},{0,23},{0,23},{0,29},{0,29},{0,5},{0,5},{0,10},{0,10},{0,6},{0,6},{0,11},{0,11},{1,8},{1,8},{1,13},{1,13},{1,9},{1,9},{1,14},{1,14},{1,8},{1,8},{1,13},{1,13},{1,9},{1,9},{1,14},{1,14}}, + {{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{2,5},{2,6},{2,7},{2,7},{2,5},{2,6},{2,7},{2,7},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30},{1,6},{1,11},{1,15},{1,15},{1,6},{1,11},{1,15},{1,15}}, + {{2,5},{2,6},{2,7},{2,7},{3,8},{3,9},{3,2},{3,2},{2,5},{2,6},{2,7},{2,7},{3,8},{3,9},{3,2},{3,2},{1,6},{1,11},{1,15},{1,15},{1,7},{1,12},{1,16},{1,16},{1,6},{1,11},{1,15},{1,15},{1,7},{1,12},{1,16},{1,16}}, + {{0,6},{0,6},{0,11},{0,11},{0,15},{0,15},{0,15},{0,15},{0,6},{0,6},{0,11},{0,11},{0,15},{0,15},{0,15},{0,15},{1,9},{1,9},{1,14},{1,14},{1,17},{1,17},{1,17},{1,17},{1,9},{1,9},{1,14},{1,14},{1,17},{1,17},{1,17},{1,17}}, + {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18}}, + {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{2,18},{2,18},{2,18},{2,18},{2,18},{2,18},{2,18},{2,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,18},{1,19},{1,19},{1,19},{1,19},{1,19},{1,19},{1,19},{1,19}}, + {{0,0},{0,0},{0,0},{0,0},{3,18},{3,18},{3,18},{3,18},{0,0},{0,0},{0,0},{0,0},{3,18},{3,18},{3,18},{3,18},{1,18},{1,18},{1,18},{1,18},{1,20},{1,20},{1,20},{1,20},{1,18},{1,18},{1,18},{1,18},{1,20},{1,20},{1,20},{1,20}}, + {{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21}}, + {{0,0},{0,0},{4,18},{4,18},{3,18},{3,18},{3,19},{3,19},{0,0},{0,0},{4,18},{4,18},{3,18},{3,18},{3,19},{3,19},{1,18},{1,18},{1,21},{1,21},{1,20},{1,20},{1,22},{1,22},{1,18},{1,18},{1,21},{1,21},{1,20},{1,20},{1,22},{1,22}}, + {{0,0},{0,0},{4,18},{4,18},{0,0},{0,0},{4,18},{4,18},{2,18},{2,18},{2,20},{2,20},{2,18},{2,18},{2,20},{2,20},{1,18},{1,18},{1,21},{1,21},{1,18},{1,18},{1,21},{1,21},{1,19},{1,19},{1,23},{1,23},{1,19},{1,19},{1,23},{1,23}}, + {{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24}}, + {{0,0},{5,18},{0,0},{5,18},{3,18},{3,20},{3,18},{3,20},{0,0},{5,18},{0,0},{5,18},{3,18},{3,20},{3,18},{3,20},{1,18},{1,24},{1,18},{1,24},{1,20},{1,25},{1,20},{1,25},{1,18},{1,24},{1,18},{1,24},{1,20},{1,25},{1,20},{1,25}}, + {{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{0,0},{5,18},{2,18},{2,21},{2,18},{2,21},{2,18},{2,21},{2,18},{2,21},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,18},{1,24},{1,19},{1,26},{1,19},{1,26},{1,19},{1,26},{1,19},{1,26}}, + {{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27}}, + {{0,0},{5,18},{4,18},{4,19},{3,18},{3,20},{3,19},{3,5},{0,0},{5,18},{4,18},{4,19},{3,18},{3,20},{3,19},{3,5},{1,18},{1,24},{1,21},{1,27},{1,20},{1,25},{1,22},{1,28},{1,18},{1,24},{1,21},{1,27},{1,20},{1,25},{1,22},{1,28}}, + {{0,0},{5,18},{4,18},{4,19},{0,0},{5,18},{4,18},{4,19},{2,18},{2,21},{2,20},{2,22},{2,18},{2,21},{2,20},{2,22},{1,18},{1,24},{1,21},{1,27},{1,18},{1,24},{1,21},{1,27},{1,19},{1,26},{1,23},{1,29},{1,19},{1,26},{1,23},{1,29}}, + {{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{3,19},{3,5},{4,8},{4,8},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30},{1,22},{1,28},{1,30},{1,30}} + }; + static constexpr uint8_t edits[31][5] = { + {3,3,3,3,3}, + {0,1,2,3,3}, + {1,1,2,3,3}, + {1,1,2,2,3}, + {1,1,1,2,3}, + {2,2,2,3,3}, + {2,2,2,2,3}, + {2,2,1,2,3}, + {1,2,3,3,3}, + {1,2,2,3,3}, + {2,2,2,3,2}, + {2,2,2,2,2}, + {2,2,1,2,2}, + {1,2,3,2,3}, + {1,2,2,2,3}, + {2,2,2,1,2}, + {2,2,1,1,2}, + {1,2,1,2,3}, + {2,3,3,3,3}, + {2,2,3,3,3}, + {2,3,2,3,3}, + {2,3,3,2,3}, + {2,3,2,2,3}, + {2,2,3,2,3}, + {2,3,3,3,2}, + {2,3,2,3,2}, + {2,2,3,3,2}, + {2,3,3,2,2}, + {2,3,2,2,2}, + {2,2,3,2,2}, + {2,3,2,1,2} + }; +}; +// end of auto-generated code for N = 2 diff --git a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h index dc511a42d04..45c52c84bad 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h +++ b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.h @@ -44,7 +44,6 @@ public: }; private: - const void *_tfa; const std::vector _lookup; const bool _is_cased; diff --git a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp index 0693343007e..d3cc9002e2b 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp +++ b/vespalib/src/vespa/vespalib/fuzzy/table_dfa.hpp @@ -196,7 +196,7 @@ struct StateRepo { return refs[idx]->first; } }; -StateRepo::~StateRepo() = default; +[[maybe_unused]] StateRepo::~StateRepo() = default; template std::vector expand_bits(uint32_t value) { @@ -212,7 +212,7 @@ std::vector expand_bits(uint32_t value) { } template -[[maybe_unused]] StateRepo make_state_repo() { +StateRepo make_state_repo() { StateRepo repo; for (uint32_t idx = 0; idx < repo.size(); ++idx) { const State &state = repo.idx_to_state(idx); @@ -225,16 +225,20 @@ template return repo; } -// this is the result of our efforts +struct Transition { + uint8_t step; + uint8_t state; + constexpr Transition() noexcept : step(0), state(0) {} + constexpr Transition(uint8_t di, uint8_t ns) noexcept : step(di), state(ns) {} +}; + +template struct InlineTfa; +#include "inline_tfa.hpp" + template struct Tfa { - struct Entry { - uint8_t step; - uint8_t state; - }; - // what happens when following a transition from a state? - std::array()>,num_states()> table; + std::array()>,num_states()> table; // how many edits did we use to match the target word? std::array()>,num_states()> edits; @@ -267,12 +271,6 @@ std::unique_ptr> make_tfa() { return tfa; } -template -const Tfa *get_tfa() { - static std::unique_ptr> tfa = make_tfa(); - return tfa.get(); -} - template vespalib::string format_vector(const std::vector &vector, bool compact = false) { vespalib::string str = compact ? "" : "["; @@ -296,56 +294,55 @@ struct TableMatcher { // needed by dfa matcher concept (should use std::declval instead) constexpr S() noexcept : index(0), state(0) {} constexpr S(uint32_t i, uint32_t s) noexcept : index(i), state(s) {} - S next(const Tfa *tfa, uint32_t bits) noexcept { - auto entry = tfa->table[state][bits]; + S next(uint32_t bits) noexcept { + const auto &entry = InlineTfa::table[state][bits]; return S(index + entry.step, entry.state); } - constexpr bool is_valid_edge(const Tfa *tfa, uint32_t bits) const noexcept { - return tfa->table[state][bits].state != 0; + constexpr bool is_valid_edge(uint32_t bits) const noexcept { + return InlineTfa::table[state][bits].state != 0; } }; using StateType = S; using StateParamType = StateType; using EdgeType = uint32_t; - const Tfa *tfa; const TableDfa::Lookup *lookup; const uint32_t end; const bool cased; - TableMatcher(const Tfa *tfa_in, const TableDfa::Lookup *lookup_in, uint32_t end_in, bool cased_in) - noexcept : tfa(tfa_in), lookup(lookup_in), end(end_in), cased(cased_in) {} + TableMatcher(const TableDfa::Lookup *lookup_in, uint32_t end_in, bool cased_in) + noexcept : lookup(lookup_in), end(end_in), cased(cased_in) {} bool is_cased() const noexcept { return cased; } static constexpr S start() noexcept { return S(0, 1); } uint8_t match_edit_distance(S s) const noexcept { uint32_t leap = (end - s.index); - return (leap < window_size()) ? tfa->edits[s.state][leap] : N + 1; + return (leap < window_size()) ? InlineTfa::edits[s.state][leap] : N + 1; } bool is_match(S s) const noexcept { return match_edit_distance(s) <= N; } static constexpr bool can_match(S s) noexcept { return (s.state != 0); } static constexpr bool valid_state(S s) noexcept { return (s.state != 0); } - S match_wildcard(S s) const noexcept { return s.next(tfa, 0); } + S match_wildcard(S s) const noexcept { return s.next(0); } S match_input(S s, uint32_t c) const noexcept { const auto *slice = lookup[s.index].list.data(); for (size_t i = 0; i < window_size() && slice[i].input != 0; ++i) { if (slice[i].input == c) { - return s.next(tfa, slice[i].match); + return s.next(slice[i].match); } } return match_wildcard(s); } bool has_higher_out_edge(S s, uint32_t c) const noexcept { - if (s.is_valid_edge(tfa, 0)) { + if (s.is_valid_edge(0)) { return true; } const auto *slice = lookup[s.index].list.data(); for (size_t i = 0; i < window_size() && slice[i].input > c; ++i) { - if (s.is_valid_edge(tfa, slice[i].match)) { + if (s.is_valid_edge(slice[i].match)) { return true; } } @@ -356,7 +353,7 @@ struct TableMatcher { const auto *slice = lookup[s.index].list.data(); for (size_t i = 0; i < window_size() && slice[i].input >= c; ++i) { if (slice[i].input == c) { - return s.is_valid_edge(tfa, slice[i].match); + return s.is_valid_edge(slice[i].match); } } return false; @@ -366,7 +363,7 @@ struct TableMatcher { const auto *slice = lookup[s.index].list.data(); size_t i = window_size(); while (i-- > 0) { - if (slice[i].input > c && s.is_valid_edge(tfa, slice[i].match)) { + if (slice[i].input > c && s.is_valid_edge(slice[i].match)) { return slice[i].input; } } @@ -377,7 +374,7 @@ struct TableMatcher { const auto *slice = lookup[s.index].list.data(); size_t i = window_size(); while (i-- > 0) { - if (slice[i].input != 0 && s.is_valid_edge(tfa, slice[i].match)) { + if (slice[i].input != 0 && s.is_valid_edge(slice[i].match)) { return slice[i].input; } } @@ -436,8 +433,7 @@ TableDfa::make_lookup(const std::vector &str)->std::vector template TableDfa::TableDfa(std::vector str, bool is_cased) - : _tfa(get_tfa()), - _lookup(make_lookup(str)), + : _lookup(make_lookup(str)), _is_cased(is_cased) { } @@ -449,7 +445,7 @@ template LevenshteinDfa::MatchResult TableDfa::match(std::string_view u8str) const { - TableMatcher matcher(static_cast*>(_tfa), _lookup.data(), _lookup.size() - 1, _is_cased); + TableMatcher matcher(_lookup.data(), _lookup.size() - 1, _is_cased); return MatchAlgorithm::match(matcher, u8str); } @@ -457,7 +453,7 @@ template LevenshteinDfa::MatchResult TableDfa::match(std::string_view u8str, std::string& successor_out) const { - TableMatcher matcher(static_cast*>(_tfa), _lookup.data(), _lookup.size() - 1, _is_cased); + TableMatcher matcher(_lookup.data(), _lookup.size() - 1, _is_cased); return MatchAlgorithm::match(matcher, u8str, successor_out); } @@ -465,7 +461,7 @@ template LevenshteinDfa::MatchResult TableDfa::match(std::string_view u8str, std::vector& successor_out) const { - TableMatcher matcher(static_cast*>(_tfa), _lookup.data(), _lookup.size() - 1, _is_cased); + TableMatcher matcher(_lookup.data(), _lookup.size() - 1, _is_cased); return MatchAlgorithm::match(matcher, u8str, successor_out); } @@ -486,10 +482,10 @@ TableDfa::dump_as_graphviz(std::ostream &os) const if (_lookup[i].list[j].input != 0) { std::string as_utf8; append_utf32_char(as_utf8, _lookup[i].list[j].input); - os << " x" << i << " -> " << _lookup[i].list[j].match << " [label=\"" << as_utf8 << "\"];\n"; + os << " x" << i << " -> " << format_vector(expand_bits(_lookup[i].list[j].match), true) << " [label=\"" << as_utf8 << "\"];\n"; } } - os << " x" << i << " -> 0 [label=\"*\"];\n"; + os << " x" << i << " -> " << format_vector(expand_bits(0), true) << " [label=\"*\"];\n"; } os << "}\n"; } -- cgit v1.2.3