23 files changed, 363 insertions, 81 deletions
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
index a94ca9d890e..87b771af8e6 100644
--- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
@@ -26,6 +26,7 @@ using LookupKey = IDirectPostingStore::LookupKey;
 struct IntegerKey : public LookupKey {
     int64_t _value;
     IntegerKey(int64_t value_in) : _value(value_in) {}
+    IntegerKey(const vespalib::string&) : _value() { abort(); }
     vespalib::stringref asString() const override { abort(); }
     bool asInteger(int64_t& value) const override { value = _value; return true; }
 };
@@ -33,6 +34,7 @@ struct IntegerKey : public LookupKey {
 struct StringKey : public LookupKey {
     vespalib::string _value;
     StringKey(int64_t value_in) : _value(std::to_string(value_in)) {}
+    StringKey(const vespalib::string& value_in) : _value(value_in) {}
     vespalib::stringref asString() const override { return _value; }
     bool asInteger(int64_t&) const override { abort(); }
 };
@@ -78,6 +80,10 @@ populate_attribute(AttributeType& attr, const std::vector<DataType>& values)
     for (auto docid : range(300, 128)) {
         attr.update(docid, values[3]);
     }
+    if (values.size() > 4) {
+        attr.update(40, values[4]);
+        attr.update(41, values[5]);
+    }
     attr.commit(true);
 }
 
@@ -93,7 +99,7 @@ make_attribute(CollectionType col_type, BasicType type, bool field_is_filter)
     auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get();
     if (type == BasicType::STRING) {
         populate_attribute<StringAttribute, vespalib::string>(dynamic_cast<StringAttribute&>(*attr),
-                                                              {"1", "3", "100", "300"});
+                                                              {"1", "3", "100", "300", "foo", "Foo"});
     } else {
         populate_attribute<IntegerAttribute, int64_t>(dynamic_cast<IntegerAttribute&>(*attr),
                                                       {1, 3, 100, 300});
@@ -223,15 +229,16 @@ public:
             tfmd.tagAsNotNeeded();
         }
     }
-    template <typename BlueprintType>
-    void add_term_helper(BlueprintType& b, int64_t term_value) {
+    template <typename BlueprintType, typename TermType>
+    void add_term_helper(BlueprintType& b, TermType term_value) {
         if (integer_type) {
             b.addTerm(IntegerKey(term_value), 1, estimate);
         } else {
             b.addTerm(StringKey(term_value), 1, estimate);
         }
     }
-    void add_term(int64_t term_value) {
+    template <typename TermType>
+    void add_term(TermType term_value) {
         if (single_type) {
             if (in_operator) {
                 add_term_helper(dynamic_cast<SingleInBlueprintType&>(*blueprint), term_value);
@@ -246,8 +253,18 @@ public:
             }
         }
     }
-    std::unique_ptr<SearchIterator> create_leaf_search() const {
-        return blueprint->createLeafSearch(tfmda, true);
+    void add_terms(const std::vector<int64_t>& term_values) {
+        for (auto value : term_values) {
+            add_term(value);
+        }
+    }
+    void add_terms(const std::vector<vespalib::string>& term_values) {
+        for (auto value : term_values) {
+            add_term(value);
+        }
+    }
+    std::unique_ptr<SearchIterator> create_leaf_search(bool strict = true) const {
+        return blueprint->createLeafSearch(tfmda, strict);
     }
     vespalib::string resolve_iterator_with_unpack() const {
         if (in_operator) {
@@ -262,7 +279,7 @@ expect_hits(const Docids& exp_docids, SearchIterator& itr)
 {
     SimpleResult exp(exp_docids);
     SimpleResult act;
-    act.search(itr);
+    act.search(itr, doc_id_limit);
     EXPECT_EQ(exp, act);
 }
 
@@ -294,8 +311,7 @@ INSTANTIATE_TEST_SUITE_P(DefaultInstantiation,
 
 TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_none_filter_field) {
     setup(false, true);
-    add_term(1);
-    add_term(3);
+    add_terms({1, 3});
     auto itr = create_leaf_search();
     EXPECT_THAT(itr->asString(), StartsWith(resolve_iterator_with_unpack()));
     expect_hits({10, 30, 31}, *itr);
@@ -307,8 +323,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_used_instead_of_btree_iterators_
     if (!in_operator) {
         return;
     }
-    add_term(1);
-    add_term(100);
+    add_terms({1, 100});
     auto itr = create_leaf_search();
     expect_or_iterator(*itr, 2);
     expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
@@ -322,8 +337,7 @@ TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_instead_of_bitvectors_
     if (in_operator) {
         return;
     }
-    add_term(1);
-    add_term(100);
+    add_terms({1, 100});
     auto itr = create_leaf_search();
     EXPECT_THAT(itr->asString(), StartsWith(iterator_unpack_docid_and_weights));
     expect_hits(concat({10}, range(100, 128)), *itr);
@@ -332,10 +346,7 @@ TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_instead_of_bitvectors_
 TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_filter_field)
 {
     setup(true, true);
-    add_term(1);
-    add_term(3);
-    add_term(100);
-    add_term(300);
+    add_terms({1, 3, 100, 300});
     auto itr = create_leaf_search();
     expect_or_iterator(*itr, 3);
     expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
@@ -347,8 +358,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_fil
 TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field)
 {
     setup(true, true);
-    add_term(100);
-    add_term(300);
+    add_terms({100, 300});
     auto itr = create_leaf_search();
     expect_or_iterator(*itr, 2);
     expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
@@ -359,8 +369,7 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field)
 TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_filter_field_when_ranking_not_needed)
 {
     setup(true, false);
-    add_term(1);
-    add_term(3);
+    add_terms({1, 3});
     auto itr = create_leaf_search();
     EXPECT_THAT(itr->asString(), StartsWith(iterator_unpack_none));
     expect_hits({10, 30, 31}, *itr);
@@ -369,10 +378,7 @@ TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_filter_field_when_
 TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_filter_field_when_ranking_not_needed)
 {
     setup(true, false);
-    add_term(1);
-    add_term(3);
-    add_term(100);
-    add_term(300);
+    add_terms({1, 3, 100, 300});
     auto itr = create_leaf_search();
     expect_or_iterator(*itr, 3);
     expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
@@ -384,8 +390,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_fil
 TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_ranking_not_needed)
 {
     setup(true, false);
-    add_term(100);
-    add_term(300);
+    add_terms({100, 300});
     auto itr = create_leaf_search();
     expect_or_iterator(*itr, 2);
     expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
@@ -393,4 +398,41 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_
     expect_hits(concat(range(100, 128), range(300, 128)), *itr);
 }
 
+TEST_P(DirectMultiTermBlueprintTest, hash_filter_used_for_non_strict_iterator_with_10_or_more_terms)
+{
+    setup(true, true);
+    if (!single_type) {
+        return;
+    }
+    add_terms({1, 3, 3, 3, 3, 3, 3, 3, 3, 3});
+    auto itr = create_leaf_search(false);
+    EXPECT_THAT(itr->asString(), StartsWith("search::attribute::MultiTermHashFilter"));
+    expect_hits({10, 30, 31}, *itr);
+}
+
+TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_non_strict_iterator_with_9_or_less_terms)
+{
+    setup(true, true);
+    if (!single_type) {
+        return;
+    }
+    add_terms({1, 3, 3, 3, 3, 3, 3, 3, 3});
+    auto itr = create_leaf_search(false);
+    EXPECT_THAT(itr->asString(), StartsWith(iterator_unpack_docid));
+    expect_hits({10, 30, 31}, *itr);
+}
+
+TEST_P(DirectMultiTermBlueprintTest, hash_filter_with_string_folding_used_for_non_strict_iterator)
+{
+    setup(true, true);
+    if (!single_type || integer_type) {
+        return;
+    }
+    // "foo" matches documents with "foo" (40) and "Foo" (41).
+    add_terms({"foo", "3", "3", "3", "3", "3", "3", "3", "3", "3"});
+    auto itr = create_leaf_search(false);
+    EXPECT_THAT(itr->asString(), StartsWith("search::attribute::MultiTermHashFilter"));
+    expect_hits({30, 31, 40, 41}, *itr);
+}
+
 GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp
index fe6149e6fba..97b3d88c25e 100644
--- a/searchlib/src/tests/query/streaming_query_test.cpp
+++ b/searchlib/src/tests/query/streaming_query_test.cpp
@@ -23,9 +23,9 @@ using TermType = QueryTerm::Type;
 using search::fef::SimpleTermData;
 using search::fef::MatchData;
 
-void assertHit(const Hit & h, size_t expWordpos, size_t expContext, int32_t weight) {
+void assertHit(const Hit & h, size_t expWordpos, size_t exp_field_id, int32_t weight) {
     EXPECT_EQ(h.wordpos(), expWordpos);
-    EXPECT_EQ(h.context(), expContext);
+    EXPECT_EQ(h.field_id(), exp_field_id);
     EXPECT_EQ(h.weight(), weight);
 }
 
@@ -479,11 +479,11 @@ TEST(StreamingQueryTest, test_phrase_evaluate)
     p->evaluateHits(hits);
     ASSERT_EQ(3u, hits.size());
     EXPECT_EQ(hits[0].wordpos(), 2u);
-    EXPECT_EQ(hits[0].context(), 0u);
+    EXPECT_EQ(hits[0].field_id(), 0u);
     EXPECT_EQ(hits[1].wordpos(), 6u);
-    EXPECT_EQ(hits[1].context(), 1u);
+    EXPECT_EQ(hits[1].field_id(), 1u);
     EXPECT_EQ(hits[2].wordpos(), 2u);
-    EXPECT_EQ(hits[2].context(), 3u);
+    EXPECT_EQ(hits[2].field_id(), 3u);
     ASSERT_EQ(4u, p->getFieldInfoSize());
     EXPECT_EQ(p->getFieldInfo(0).getHitOffset(), 0u);
     EXPECT_EQ(p->getFieldInfo(0).getHitCount(),  1u);
@@ -847,22 +847,22 @@ TEST(StreamingQueryTest, test_same_element_evaluate)
     sameElem->evaluateHits(hits);
     EXPECT_EQ(4u, hits.size());
     EXPECT_EQ(0u, hits[0].wordpos());
-    EXPECT_EQ(2u, hits[0].context());
+    EXPECT_EQ(2u, hits[0].field_id());
     EXPECT_EQ(0u, hits[0].elemId());
     EXPECT_EQ(130,  hits[0].weight());
 
     EXPECT_EQ(0u, hits[1].wordpos());
-    EXPECT_EQ(2u, hits[1].context());
+    EXPECT_EQ(2u, hits[1].field_id());
     EXPECT_EQ(2u, hits[1].elemId());
     EXPECT_EQ(140,  hits[1].weight());
 
     EXPECT_EQ(0u, hits[2].wordpos());
-    EXPECT_EQ(2u, hits[2].context());
+    EXPECT_EQ(2u, hits[2].field_id());
     EXPECT_EQ(4u, hits[2].elemId());
     EXPECT_EQ(150,  hits[2].weight());
 
     EXPECT_EQ(0u, hits[3].wordpos());
-    EXPECT_EQ(2u, hits[3].context());
+    EXPECT_EQ(2u, hits[3].field_id());
     EXPECT_EQ(5u, hits[3].elemId());
     EXPECT_EQ(160,  hits[3].weight());
     EXPECT_TRUE(sameElem->evaluate());
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 01148c11c9c..d0353ab8947 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -6,12 +6,12 @@
 #include <vespa/searchlib/common/bitvector.h>
 #include <vespa/searchlib/fef/matchdatalayout.h>
 #include <vespa/searchlib/query/query_term_ucs4.h>
+#include <vespa/searchlib/queryeval/filter_wrapper.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
 #include <vespa/searchlib/queryeval/weighted_set_term_search.h>
 #include <vespa/vespalib/objects/visit.h>
-#include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/stllike/hash_map.hpp>
-#include <vespa/searchlib/queryeval/filter_wrapper.h>
-#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/vespalib/util/stringfmt.h>
 
 
 namespace search {
@@ -38,6 +38,7 @@ class StringEnumWrapper : public AttrWrapper
 {
 public:
     using TokenT = uint32_t;
+    static constexpr bool unpack_weights = true;
     explicit StringEnumWrapper(const IAttributeVector & attr)
         : AttrWrapper(attr) {}
     auto mapToken(const ISearchContext &context) const {
@@ -52,6 +53,7 @@ class IntegerWrapper : public AttrWrapper
 {
 public:
     using TokenT = uint64_t;
+    static constexpr bool unpack_weights = true;
     explicit IntegerWrapper(const IAttributeVector & attr) : AttrWrapper(attr) {}
     std::vector<int64_t> mapToken(const ISearchContext &context) const {
         std::vector<int64_t> result;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index 42651854599..485427391ad 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -35,6 +35,8 @@ private:
     using IteratorType = typename PostingStoreType::IteratorType;
     using IteratorWeights = std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>>;
 
+    bool use_hash_filter(bool strict) const;
+
     IteratorWeights create_iterators(std::vector<IteratorType>& btree_iterators,
                                      std::vector<std::unique_ptr<queryeval::SearchIterator>>& bitvectors,
                                      bool use_bitvector_when_available,
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index 6fc8ac63026..0a3b24142a5 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -8,6 +8,7 @@
 #include <vespa/searchlib/queryeval/emptysearch.h>
 #include <vespa/searchlib/queryeval/filter_wrapper.h>
 #include <vespa/searchlib/queryeval/orsearch.h>
+#include <cmath>
 #include <memory>
 #include <type_traits>
 
@@ -38,6 +39,43 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::DirectMultiTermBlueprint
 template <typename PostingStoreType, typename SearchType>
 DirectMultiTermBlueprint<PostingStoreType, SearchType>::~DirectMultiTermBlueprint() = default;
 
+
+template <typename PostingStoreType, typename SearchType>
+bool
+DirectMultiTermBlueprint<PostingStoreType, SearchType>::use_hash_filter(bool strict) const
+{
+    if (strict || _iattr.hasMultiValue()) {
+        return false;
+    }
+    // The following very simplified formula was created after analysing performance of the IN operator
+    // on a 10M document corpus using a machine with an Intel Xeon 2.5 GHz CPU with 48 cores and 256 Gb of memory:
+    // https://github.com/vespa-engine/system-test/tree/master/tests/performance/in_operator
+    //
+    // The following 25 test cases were used to calculate the cost of using btree iterators (strict):
+    // op_hits_ratios = [5, 10, 50, 100, 200] * tokens_in_op = [1, 5, 10, 100, 1000]
+    // For each case we calculate the latency difference against the case with tokens_in_op=1 and the same op_hits_ratio.
+    // This indicates the extra time used to produce the same number of hits when having multiple tokens in the operator.
+    // The latency diff is divided with the number of hits produced and convert to nanoseconds:
+    //   10M * (op_hits_ratio / 1000) * 1000 * 1000
+    // Based on the numbers we can approximate the cost per document (in nanoseconds) as:
+    //   8.0 (ns) * log2(tokens_in_op).
+    // NOTE: This is very simplified. Ideally we should also take into consideration the hit estimate of this blueprint,
+    //       as the cost per document will be lower when producing few hits.
+    //
+    // In addition, the following 28 test cases were used to calculate the cost of using the hash filter (non-strict).
+    // filter_hits_ratios = [1, 5, 10, 50, 100, 150, 200] x op_hits_ratios = [200] x tokens_in_op = [5, 10, 100, 1000]
+    // The code was altered to always using the hash filter for non-strict iterators.
+    // For each case we calculate the latency difference against a case from above with tokens_in_op=1 that produce a similar number of hits.
+    // This indicates the extra time used to produce the same number of hits when using the hash filter.
+    // The latency diff is divided with the number of hits the test filter produces and convert to nanoseconds:
+    //   10M * (filter_hits_ratio / 1000) * 1000 * 1000
+    // Based on the numbers we calculate the average cost per document (in nanoseconds) as 26.0 ns.
+
+    float hash_filter_cost_per_doc_ns = 26.0;
+    float btree_iterator_cost_per_doc_ns = 8.0 * std::log2(_terms.size());
+    return hash_filter_cost_per_doc_ns < btree_iterator_cost_per_doc_ns;
+}
+
 template <typename PostingStoreType, typename SearchType>
 typename DirectMultiTermBlueprint<PostingStoreType, SearchType>::IteratorWeights
 DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::vector<IteratorType>& btree_iterators,
@@ -96,14 +134,21 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(con
     if (_terms.empty()) {
         return std::make_unique<queryeval::EmptySearch>();
     }
+    auto& tfmd = *tfmda[0];
+    bool field_is_filter = getState().fields()[0].isFilter();
+    if constexpr (SearchType::supports_hash_filter) {
+        if (use_hash_filter(strict)) {
+            return SearchType::create_hash_filter(tfmd, (filter_search || field_is_filter),
+                                                  _weights, _terms,
+                                                  _iattr, _attr, _dictionary_snapshot);
+        }
+    }
     std::vector<IteratorType> btree_iterators;
     std::vector<queryeval::SearchIterator::UP> bitvectors;
     const size_t num_children = _terms.size();
     btree_iterators.reserve(num_children);
-    auto& tfmd = *tfmda[0];
     bool use_bit_vector_when_available = filter_search || !_attr.has_always_btree_iterator();
     auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, tfmd, strict);
-    bool field_is_filter = getState().fields()[0].isFilter();
     if constexpr (!SearchType::require_btree_iterators) {
         auto multi_term = !btree_iterators.empty() ?
                 SearchType::create(tfmd, (filter_search || field_is_filter), std::move(weights), std::move(btree_iterators))
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.h b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.h
index 9c3ea258fdc..43500366f21 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.h
@@ -23,6 +23,7 @@ class MultiTermHashFilter final : public queryeval::SearchIterator
 public:
     using Key = typename WrapperType::TokenT;
     using TokenMap = vespalib::hash_map<Key, int32_t, vespalib::hash<Key>, std::equal_to<Key>, vespalib::hashtable_base::and_modulator>;
+    static constexpr bool unpack_weights = WrapperType::unpack_weights;
 
 private:
     fef::TermFieldMatchData& _tfmd;
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.hpp b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.hpp
index 96d5b3ac1f3..e67bda147d7 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_term_hash_filter.hpp
@@ -42,10 +42,14 @@ template <typename WrapperType>
 void
 MultiTermHashFilter<WrapperType>::doUnpack(uint32_t docId)
 {
-    _tfmd.reset(docId);
-    fef::TermFieldMatchDataPosition pos;
-    pos.setElementWeight(_weight);
-    _tfmd.appendPosition(pos);
+    if constexpr (unpack_weights) {
+        _tfmd.reset(docId);
+        fef::TermFieldMatchDataPosition pos;
+        pos.setElementWeight(_weight);
+        _tfmd.appendPosition(pos);
+    } else {
+        _tfmd.resetOnlyDocId(docId);
+    }
 }
     
 }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
index 05a75f4662e..76119a6d58f 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
@@ -2,6 +2,7 @@
 vespa_add_library(searchlib_query_streaming OBJECT
     SOURCES
     dot_product_term.cpp
+    fuzzy_term.cpp
     in_term.cpp
     multi_term.cpp
     nearest_neighbor_query_node.cpp
diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp
index 1871bda564d..b3bfbb0e86b 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp
@@ -24,7 +24,7 @@ DotProductTerm::build_scores(Scores& scores) const
     for (const auto& term : _terms) {
         auto& hl = term->evaluateHits(hl_store);
         for (auto& hit : hl) {
-            scores[hit.context()] += ((int64_t)term->weight().percent()) * hit.weight();
+            scores[hit.field_id()] += ((int64_t)term->weight().percent()) * hit.weight();
         }
     }
 }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp
new file mode 100644
index 00000000000..f33fe44369a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp
@@ -0,0 +1,43 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "fuzzy_term.h"
+
+namespace search::streaming {
+
+namespace {
+
+constexpr bool normalizing_implies_cased(Normalizing norm) noexcept {
+    return (norm == Normalizing::NONE);
+}
+
+}
+
+FuzzyTerm::FuzzyTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term,
+                     const string& index, Type type, Normalizing normalizing,
+                     uint8_t max_edits, uint32_t prefix_size)
+    : QueryTerm(std::move(result_base), term, index, type, normalizing),
+      _dfa_matcher(),
+      _fallback_matcher()
+{
+    setFuzzyMaxEditDistance(max_edits);
+    setFuzzyPrefixLength(prefix_size);
+
+    std::string_view term_view(term.data(), term.size());
+    const bool cased = normalizing_implies_cased(normalizing);
+    if (attribute::DfaFuzzyMatcher::supports_max_edits(max_edits)) {
+        _dfa_matcher = std::make_unique<attribute::DfaFuzzyMatcher>(term_view, max_edits, prefix_size, cased);
+    } else {
+        _fallback_matcher = std::make_unique<vespalib::FuzzyMatcher>(term_view, max_edits, prefix_size, cased);
+    }
+}
+
+FuzzyTerm::~FuzzyTerm() = default;
+
+bool FuzzyTerm::is_match(std::string_view term) const {
+    if (_dfa_matcher) {
+        return _dfa_matcher->is_match(term);
+    } else {
+        return _fallback_matcher->isMatch(term);
+    }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h
new file mode 100644
index 00000000000..c6c88b18969
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h
@@ -0,0 +1,34 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "queryterm.h"
+#include <vespa/searchlib/attribute/dfa_fuzzy_matcher.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
+#include <memory>
+#include <string_view>
+
+namespace search::streaming {
+
+/**
+ * Query term that matches candidate field terms that are within a query-specified
+ * maximum number of edits (add, delete or substitute a character), with case
+ * sensitivity controlled by the provided Normalizing mode.
+ *
+ * Optionally, terms may be prefixed-locked, which enforces field terms to have a
+ * particular prefix and where edits are only counted for the remaining term suffix.
+ */
+class FuzzyTerm : public QueryTerm {
+    std::unique_ptr<attribute::DfaFuzzyMatcher> _dfa_matcher;
+    std::unique_ptr<vespalib::FuzzyMatcher>     _fallback_matcher;
+public:
+    FuzzyTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term,
+              const string& index, Type type, Normalizing normalizing,
+              uint8_t max_edits, uint32_t prefix_size);
+    ~FuzzyTerm() override;
+
+    [[nodiscard]] FuzzyTerm* as_fuzzy_term() noexcept override { return this; }
+
+    [[nodiscard]] bool is_match(std::string_view term) const;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.h b/searchlib/src/vespa/searchlib/query/streaming/hit.h
index a798d293491..1e467a895ac 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/hit.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit.h
@@ -9,19 +9,17 @@ namespace search::streaming {
 class Hit
 {
 public:
-    Hit(uint32_t pos_, uint32_t context_, uint32_t elemId_, int32_t weight_) noexcept
-        : _position(pos_ | (context_<<24)),
+    Hit(uint32_t pos_, uint32_t field_id_, uint32_t elemId_, int32_t weight_) noexcept
+        : _position(pos_ | (field_id_<<24)),
           _elemId(elemId_),
           _weight(weight_)
     { }
     int32_t weight() const { return _weight; }
     uint32_t pos()     const { return _position; }
     uint32_t wordpos() const { return _position & 0xffffff; }
-    uint32_t context() const { return _position >> 24; }
+    uint32_t field_id() const noexcept { return _position >> 24; }
     uint32_t elemId() const { return _elemId; }
-    bool operator < (const Hit & b) const { return cmp(b) < 0; }
 private:
-    int cmp(const Hit & b) const { return _position - b._position; }
     uint32_t _position;
     uint32_t _elemId;
     int32_t  _weight;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp
index 3e75f4a5114..c164db69ba1 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp
@@ -29,9 +29,9 @@ InTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_
     for (const auto& term : _terms) {
         auto& hl = term->evaluateHits(hl_store);
         for (auto& hit : hl) {
-            if (!prev_field_id.has_value() || prev_field_id.value() != hit.context()) {
-                prev_field_id = hit.context();
-                matching_field_ids.insert(hit.context());
+            if (!prev_field_id.has_value() || prev_field_id.value() != hit.field_id()) {
+                prev_field_id = hit.field_id();
+                matching_field_ids.insert(hit.field_id());
             }
         }
     }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp
index dd34b9b7e73..f5a09892551 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp
@@ -51,11 +51,4 @@ MultiTerm::evaluate() const
     return false;
 }
 
-const HitList&
-MultiTerm::evaluateHits(HitList& hl) const
-{
-    hl.clear();
-    return hl;
-}
-
 }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h
index 3bb69e29693..6f795c31356 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h
@@ -32,7 +32,6 @@ public:
     MultiTerm* as_multi_term() noexcept override { return this; }
     void reset() override;
     bool evaluate() const override;
-    const HitList& evaluateHits(HitList& hl) const override;
     virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) = 0;
     const std::vector<std::unique_ptr<QueryTerm>>& get_terms() const noexcept { return _terms; }
 };
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
index ca742aabe26..618922eced9 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
@@ -208,7 +208,7 @@ SameElementQueryNode::evaluateHits(HitList & hl) const
             currMatchCount++;
             if ((currMatchCount+1) == numFields) {
                 Hit h = nextHL[indexVector[currMatchCount]];
-                hl.emplace_back(0, h.context(), h.elemId(), h.weight());
+                hl.emplace_back(0, h.field_id(), h.elemId(), h.weight());
                 currMatchCount = 0;
                 indexVector[0]++;
             }
@@ -260,26 +260,26 @@ PhraseQueryNode::evaluateHits(HitList & hl) const
         const auto & currHit = curr->evaluateHits(tmpHL)[currIndex];
         size_t firstPosition = currHit.pos();
         uint32_t currElemId = currHit.elemId();
-        uint32_t currContext = currHit.context();
+        uint32_t curr_field_id = currHit.field_id();
 
         const HitList & nextHL = next->evaluateHits(tmpHL);
 
         int diff(0);
         size_t nextIndexMax = nextHL.size();
         while ((nextIndex < nextIndexMax) &&
-              ((nextHL[nextIndex].context() < currContext) ||
-               ((nextHL[nextIndex].context() == currContext) && (nextHL[nextIndex].elemId() <= currElemId))) &&
+              ((nextHL[nextIndex].field_id() < curr_field_id) ||
+               ((nextHL[nextIndex].field_id() == curr_field_id) && (nextHL[nextIndex].elemId() <= currElemId))) &&
              ((diff = nextHL[nextIndex].pos()-firstPosition) < 1))
         {
             nextIndex++;
         }
-        if ((diff == 1) && (nextHL[nextIndex].context() == currContext) && (nextHL[nextIndex].elemId() == currElemId)) {
+        if ((diff == 1) && (nextHL[nextIndex].field_id() == curr_field_id) && (nextHL[nextIndex].elemId() == currElemId)) {
             currPhraseLen++;
             if ((currPhraseLen+1) == fullPhraseLen) {
                 Hit h = nextHL[indexVector[currPhraseLen]];
                 hl.push_back(h);
-                const QueryTerm::FieldInfo & fi = next->getFieldInfo(h.context());
-                updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength());
+                const QueryTerm::FieldInfo & fi = next->getFieldInfo(h.field_id());
+                updateFieldInfo(h.field_id(), hl.size() - 1, fi.getFieldLength());
                 currPhraseLen = 0;
                 indexVector[0]++;
             }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 2ee515f062a..e71529a8aca 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -1,7 +1,8 @@
 // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
-#include "query.h"
+#include "fuzzy_term.h"
 #include "nearest_neighbor_query_node.h"
+#include "query.h"
 #include "regexp_term.h"
 #include <vespa/searchlib/parsequery/stackdumpiterator.h>
 #include <vespa/searchlib/query/streaming/dot_product_term.h>
@@ -147,17 +148,16 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
         } else {
             Normalizing normalize_mode = factory.normalizing_mode(ssIndex);
             std::unique_ptr<QueryTerm> qt;
-            if (sTerm != TermType::REGEXP) {
-                qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode);
-            } else {
+            if (sTerm == TermType::REGEXP) {
                 qt = std::make_unique<RegexpTerm>(factory.create(), ssTerm, ssIndex, TermType::REGEXP, normalize_mode);
+            } else if (sTerm == TermType::FUZZYTERM) {
+                qt = std::make_unique<FuzzyTerm>(factory.create(), ssTerm, ssIndex, TermType::FUZZYTERM, normalize_mode,
+                                                 queryRep.getFuzzyMaxEditDistance(), queryRep.getFuzzyPrefixLength());
+            } else [[likely]] {
+                qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode);
             }
             qt->setWeight(queryRep.GetWeight());
             qt->setUniqueId(queryRep.getUniqueId());
-            if (qt->isFuzzy()) {
-                qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance());
-                qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength());
-            }
             if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) {
                 auto phrase = std::make_unique<PhraseQueryNode>();
                 auto dotPos = ssTerm.find('.');
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index 3e05d381ee2..fb002ec1867 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -185,4 +185,10 @@ QueryTerm::as_regexp_term() noexcept
     return nullptr;
 }
 
+FuzzyTerm*
+QueryTerm::as_fuzzy_term() noexcept
+{
+    return nullptr;
+}
+
 }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index cd2bdd7eaec..b4dfa98ebe5 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -11,6 +11,7 @@
 
 namespace search::streaming {
 
+class FuzzyTerm;
 class NearestNeighborQueryNode;
 class MultiTerm;
 class RegexpTerm;
@@ -64,7 +65,7 @@ public:
     QueryTerm & operator = (QueryTerm &&) = delete;
     ~QueryTerm() override;
     bool evaluate() const override;
-    const HitList & evaluateHits(HitList & hl) const override;
+    const HitList & evaluateHits(HitList & hl) const final override;
     void reset() override;
     void getLeaves(QueryTermList & tl) override;
     void getLeaves(ConstQueryTermList & tl) const override;
@@ -95,6 +96,7 @@ public:
     virtual NearestNeighborQueryNode* as_nearest_neighbor_query_node() noexcept;
     virtual MultiTerm* as_multi_term() noexcept;
     virtual RegexpTerm* as_regexp_term() noexcept;
+    virtual FuzzyTerm* as_fuzzy_term() noexcept;
 protected:
     using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>;
     string                       _index;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp
index 90d0be5d43c..d2d706eef3d 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/weighted_set_term.cpp
@@ -25,7 +25,7 @@ WeightedSetTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchDat
     for (const auto& term : _terms) {
         auto& hl = term->evaluateHits(hl_store);
         for (auto& hit : hl) {
-            scores[hit.context()].emplace_back(term->weight().percent());
+            scores[hit.field_id()].emplace_back(term->weight().percent());
         }
     }
     auto num_fields = td.numFields();
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
index e49fcbcb5bc..c74c2d4e9a7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
@@ -27,6 +27,7 @@ protected:
 public:
     static constexpr bool filter_search = false;
     static constexpr bool require_btree_iterators = true;
+    static constexpr bool supports_hash_filter = false;
 
     // TODO: use MultiSearch::Children to pass ownership
     static SearchIterator::UP create(const std::vector<SearchIterator*> &children,
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
index 0c57b21aba6..0be014474d0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -1,10 +1,13 @@
 // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "weighted_set_term_search.h"
+#include <vespa/searchcommon/attribute/i_search_context.h>
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/attribute/i_direct_posting_store.h>
+#include <vespa/searchlib/attribute/multi_term_hash_filter.hpp>
 #include <vespa/searchlib/common/bitvector.h>
-#include <vespa/searchlib/attribute/multi_term_or_filter_search.h>
 #include <vespa/vespalib/objects/visit.h>
-#include <vespa/searchcommon/attribute/i_search_context.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
 
 #include "iterator_pack.h"
 #include "blueprint.h"
@@ -237,4 +240,98 @@ WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd,
     return create_helper_resolve_pack<DocidWithWeightIterator, DocidWithWeightIteratorPack>(tmd, is_filter_search, std::move(weights), std::move(iterators));
 }
 
+namespace {
+
+class HashFilterWrapper {
+protected:
+    const attribute::IAttributeVector& _attr;
+public:
+    HashFilterWrapper(const attribute::IAttributeVector& attr) : _attr(attr) {}
+};
+
+template <bool unpack_weights_t>
+class StringHashFilterWrapper : public HashFilterWrapper {
+public:
+    using TokenT = attribute::IAttributeVector::EnumHandle;
+    static constexpr bool unpack_weights = unpack_weights_t;
+    StringHashFilterWrapper(const attribute::IAttributeVector& attr)
+        : HashFilterWrapper(attr)
+    {}
+    auto mapToken(const IDirectPostingStore::LookupResult& term, const IDirectPostingStore& store, vespalib::datastore::EntryRef dict_snapshot) const {
+        std::vector<TokenT> result;
+        store.collect_folded(term.enum_idx, dict_snapshot, [&](vespalib::datastore::EntryRef ref) { result.emplace_back(ref.ref()); });
+        return result;
+    }
+    TokenT getToken(uint32_t docid) const {
+        return _attr.getEnum(docid);
+    }
+};
+
+template <bool unpack_weights_t>
+class IntegerHashFilterWrapper : public HashFilterWrapper {
+public:
+    using TokenT = attribute::IAttributeVector::largeint_t;
+    static constexpr bool unpack_weights = unpack_weights_t;
+    IntegerHashFilterWrapper(const attribute::IAttributeVector& attr)
+        : HashFilterWrapper(attr)
+    {}
+    auto mapToken(const IDirectPostingStore::LookupResult& term,
+                  const IDirectPostingStore& store,
+                  vespalib::datastore::EntryRef) const {
+        std::vector<TokenT> result;
+        result.emplace_back(store.get_integer_value(term.enum_idx));
+        return result;
+    }
+    TokenT getToken(uint32_t docid) const {
+        return _attr.getInt(docid);
+    }
+};
+
+template <typename WrapperType>
+SearchIterator::UP
+create_hash_filter_helper(fef::TermFieldMatchData& tfmd,
+                          const std::vector<int32_t>& weights,
+                          const std::vector<IDirectPostingStore::LookupResult>& terms,
+                          const attribute::IAttributeVector& attr,
+                          const IDirectPostingStore& posting_store,
+                          vespalib::datastore::EntryRef dict_snapshot)
+{
+    using FilterType = attribute::MultiTermHashFilter<WrapperType>;
+    typename FilterType::TokenMap tokens;
+    WrapperType wrapper(attr);
+    for (size_t i = 0; i < terms.size(); ++i) {
+        for (auto token : wrapper.mapToken(terms[i], posting_store, dict_snapshot)) {
+            tokens[token] = weights[i];
+        }
+    }
+    return std::make_unique<FilterType>(tfmd, wrapper, std::move(tokens));
+}
+
+}
+
+SearchIterator::UP
+WeightedSetTermSearch::create_hash_filter(search::fef::TermFieldMatchData& tmd,
+                                          bool is_filter_search,
+                                          const std::vector<int32_t>& weights,
+                                          const std::vector<IDirectPostingStore::LookupResult>& terms,
+                                          const attribute::IAttributeVector& attr,
+                                          const IDirectPostingStore& posting_store,
+                                          vespalib::datastore::EntryRef dict_snapshot)
+{
+    if (attr.isStringType()) {
+        if (is_filter_search) {
+            return create_hash_filter_helper<StringHashFilterWrapper<false>>(tmd, weights, terms, attr, posting_store, dict_snapshot);
+        } else {
+            return create_hash_filter_helper<StringHashFilterWrapper<true>>(tmd, weights, terms, attr, posting_store, dict_snapshot);
+        }
+    } else {
+        assert(attr.isIntegerType());
+        if (is_filter_search) {
+            return create_hash_filter_helper<IntegerHashFilterWrapper<false>>(tmd, weights, terms, attr, posting_store, dict_snapshot);
+        } else {
+            return create_hash_filter_helper<IntegerHashFilterWrapper<true>>(tmd, weights, terms, attr, posting_store, dict_snapshot);
+        }
+    }
+}
+
 }
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
index 7e47928fb90..d078fd5babc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -11,6 +11,8 @@
 #include <variant>
 #include <vector>
 
+namespace search::attribute { class IAttributeVector; }
+
 namespace search::fef { class TermFieldMatchData; }
 
 namespace search::queryeval {
@@ -30,6 +32,8 @@ public:
     static constexpr bool filter_search = false;
     // Whether this iterator requires btree iterators for all tokens/terms used by the operator.
     static constexpr bool require_btree_iterators = false;
+    // Whether this supports creating a hash filter iterator;
+    static constexpr bool supports_hash_filter = true;
 
     // TODO: pass ownership with unique_ptr
     static SearchIterator::UP create(const std::vector<SearchIterator *> &children,
@@ -48,6 +52,14 @@ public:
                                      std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
                                      std::vector<DocidWithWeightIterator> &&iterators);
 
+    static SearchIterator::UP create_hash_filter(search::fef::TermFieldMatchData& tmd,
+                                                 bool is_filter_search,
+                                                 const std::vector<int32_t>& weights,
+                                                 const std::vector<IDirectPostingStore::LookupResult>& terms,
+                                                 const attribute::IAttributeVector& attr,
+                                                 const IDirectPostingStore& posting_store,
+                                                 vespalib::datastore::EntryRef dictionary_snapshot);
+
     // used during docsum fetching to identify matching elements
     // initRange must be called before use.
     // doSeek/doUnpack must not be called.