diff options
-rw-r--r-- | dependency-versions/pom.xml | 2 | ||||
-rw-r--r-- | flags/src/main/java/com/yahoo/vespa/flags/Flags.java | 7 | ||||
-rw-r--r-- | parent/pom.xml | 6 | ||||
-rw-r--r-- | searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp | 47 | ||||
-rw-r--r-- | searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp | 1 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp | 5 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_graph.h | 3 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp | 9 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.h | 2 | ||||
-rw-r--r-- | searchsummary/src/tests/juniper/auxTest.cpp | 6 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/matchobject.cpp | 117 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/matchobject.h | 5 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/querynode.cpp | 90 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/querynode.h | 61 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/queryvisitor.cpp | 2 | ||||
-rw-r--r-- | vespalib/src/tests/alloc/alloc_test.cpp | 17 |
16 files changed, 181 insertions, 199 deletions
diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index 18e3e47e241..30a4bdbde79 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -65,7 +65,7 @@ <assertj.vespa.version>3.25.3</assertj.vespa.version> <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> - <aws-sdk.vespa.version>1.12.652</aws-sdk.vespa.version> + <aws-sdk.vespa.version>1.12.653</aws-sdk.vespa.version> <athenz.vespa.version>1.11.51</athenz.vespa.version> <!-- Athenz END --> diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index bd7ed3369eb..a6dd3d46821 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -428,13 +428,6 @@ public class Flags { "Takes effect immediately", TENANT_ID, CONSOLE_USER_EMAIL); - public static final UnboundBooleanFlag RESTART_ON_DEPLOY_WHEN_ONNX_MODEL_CHANGES = defineFeatureFlag( - "restart-on-deploy-when-onnx-model-changes", true, - List.of("hmusum"), "2023-12-04", "2024-03-01", - "If set, restart on deploy if onnx model or onnx model options used by a container cluster change", - "Takes effect at redeployment", - INSTANCE_ID); - /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners, String createdAt, String expiresAt, String description, diff --git a/parent/pom.xml b/parent/pom.xml index c10c74c0cb1..3944bd89dcf 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -317,7 +317,7 @@ --> <groupId>org.openrewrite.maven</groupId> <artifactId>rewrite-maven-plugin</artifactId> - <version>5.21.0</version> + <version>5.22.0</version> <configuration> <activeRecipes> <recipe>org.openrewrite.java.testing.junit5.JUnit5BestPractices</recipe> @@ -327,7 +327,7 @@ <dependency> <groupId>org.openrewrite.recipe</groupId> <artifactId>rewrite-testing-frameworks</artifactId> - <version>2.3.1</version> + <version>2.3.2</version> </dependency> </dependencies> </plugin> @@ -1172,7 +1172,7 @@ See pluginManagement of rewrite-maven-plugin for more details --> <groupId>org.openrewrite.recipe</groupId> <artifactId>rewrite-recipe-bom</artifactId> - <version>2.6.3</version> + <version>2.6.4</version> <type>pom</type> <scope>import</scope> </dependency> diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index da58dd749ba..c01fc33767a 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -273,6 +273,9 @@ public: void reset_doom(vespalib::steady_time::duration time_to_doom) { _doom = std::make_unique<vespalib::FakeDoom>(time_to_doom); } + uint32_t get_active_nodes() const noexcept { + return index->get_active_nodes(); + } static constexpr bool is_single = std::is_same_v<IndexType, HnswIndex<HnswIndexType::SINGLE>>; }; @@ -284,24 +287,29 @@ TYPED_TEST_SUITE(HnswIndexTest, HnswIndexTestTypes); TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_neighbors) { this->init(false); + EXPECT_EQ(0, this->get_active_nodes()); this->add_document(1); this->expect_level_0(1, {}); + EXPECT_EQ(1, this->get_active_nodes()); this->add_document(2); this->expect_level_0(1, {2}); this->expect_level_0(2, {1}); + EXPECT_EQ(2, this->get_active_nodes()); this->add_document(3); this->expect_level_0(1, {2, 3}); this->expect_level_0(2, {1, 3}); this->expect_level_0(3, {1, 2}); + EXPECT_EQ(3, this->get_active_nodes()); this->add_document(4); this->expect_level_0(1, {2, 3, 4}); this->expect_level_0(2, {1, 3}); this->expect_level_0(3, {1, 2, 4}); this->expect_level_0(4, {1, 3}); + EXPECT_EQ(4, this->get_active_nodes()); this->add_document(5); this->expect_level_0(1, {2, 3, 4}); @@ -309,6 +317,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_level_0(3, {1, 2, 4, 5}); this->expect_level_0(4, {1, 3}); this->expect_level_0(5, {2, 3}); + EXPECT_EQ(5, this->get_active_nodes()); this->add_document(6); this->expect_level_0(1, {2, 3, 4}); @@ -317,6 +326,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_level_0(4, {1, 3}); this->expect_level_0(5, {2, 3, 6}); this->expect_level_0(6, {2, 5}); + EXPECT_EQ(6, this->get_active_nodes()); this->add_document(7); this->expect_level_0(1, {2, 3, 4}); @@ -326,6 +336,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_level_0(5, {2, 3, 6}); this->expect_level_0(6, {2, 5}); this->expect_level_0(7, {2, 3}); + EXPECT_EQ(7, this->get_active_nodes()); this->expect_top_3(1, {1}); this->expect_top_3(2, {2, 1, 3}); @@ -352,47 +363,57 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_and_removed) { this->init(false); + EXPECT_EQ(0, this->get_active_nodes()); this->add_document(1); this->expect_level_0(1, {}); this->expect_entry_point(1, 0); + EXPECT_EQ(1, this->get_active_nodes()); this->add_document(2); this->expect_level_0(1, {2}); this->expect_level_0(2, {1}); this->expect_entry_point(1, 0); + EXPECT_EQ(2, this->get_active_nodes()); this->add_document(3); this->expect_level_0(1, {2, 3}); this->expect_level_0(2, {1, 3}); this->expect_level_0(3, {1, 2}); this->expect_entry_point(1, 0); + EXPECT_EQ(3, this->get_active_nodes()); this->remove_document(2); this->expect_level_0(1, {3}); this->expect_level_0(3, {1}); this->expect_entry_point(1, 0); + EXPECT_EQ(2, this->get_active_nodes()); this->remove_document(1); this->expect_level_0(3, {}); this->expect_entry_point(3, 0); + EXPECT_EQ(1, this->get_active_nodes()); this->remove_document(3); this->expect_entry_point(0, -1); + EXPECT_EQ(0, this->get_active_nodes()); } TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic_select_neighbors) { this->init(true); + EXPECT_EQ(0, this->get_active_nodes()); this->add_document(1); this->expect_entry_point(1, 0); this->expect_level_0(1, {}); + EXPECT_EQ(1, this->get_active_nodes()); this->add_document(2); this->expect_entry_point(1, 0); this->expect_level_0(1, {2}); this->expect_level_0(2, {1}); + EXPECT_EQ(2, this->get_active_nodes()); // Doc 3 is also added to level 1 this->add_document(3, 1); @@ -402,6 +423,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(1, {2, 3}); this->expect_level_0(2, {1}); this->expect_levels(3, {{1}, {}}); + EXPECT_EQ(3, this->get_active_nodes()); // Doc 4 is closest to 1 and they are linked. // Doc 4 is NOT linked to 3 as the distance between 4 and 3 is greater than the distance between 3 and 1. @@ -412,6 +434,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(2, {1}); this->expect_levels(3, {{1}, {}}); this->expect_level_0(4, {1}); + EXPECT_EQ(4, this->get_active_nodes()); // Doc 5 is closest to 2 and they are linked. // The other docs are reachable via 2, and no other links are created. Same argument as with doc 4 above. @@ -422,6 +445,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_levels(3, {{1}, {}}); this->expect_level_0(4, {1}); this->expect_level_0(5, {2}); + EXPECT_EQ(5, this->get_active_nodes()); // Doc 6 is closest to 5 and they are linked. // Doc 6 is also linked to 2 as the distance between 6 and 2 is less than the distance between 2 and 5. @@ -434,6 +458,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(4, {1}); this->expect_level_0(5, {2, 6}); this->expect_levels(6, {{2, 5}, {3}, {}}); + EXPECT_EQ(6, this->get_active_nodes()); // Doc 7 is closest to 3 and they are linked. // Doc 7 is also linked to 6 as the distance between 7 and 6 is less than the distance between 6 and 3. @@ -447,13 +472,15 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(5, {2, 6}); this->expect_levels(6, {{2, 5, 7}, {3}, {}}); this->expect_level_0(7, {3, 6}); + EXPECT_EQ(7, this->get_active_nodes()); { Slime actualSlime; SlimeInserter inserter(actualSlime); this->index->get_state(inserter); const auto &root = actualSlime.get(); EXPECT_EQ(0, root["memory_usage"]["onHold"].asLong()); - EXPECT_EQ(8, root["nodes"].asLong()); + EXPECT_EQ(8, root["nodeid_limit"].asLong()); + EXPECT_EQ(7, root["nodes"].asLong()); EXPECT_EQ(7, root["valid_nodes"].asLong()); EXPECT_EQ(0, root["level_histogram"][0].asLong()); EXPECT_EQ(5, root["level_histogram"][1].asLong()); @@ -476,13 +503,15 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(5, {2, 6}); this->expect_levels(6, {{2, 5, 7}, {3}, {}}); this->expect_level_0(7, {3, 6}); + EXPECT_EQ(6, this->get_active_nodes()); { Slime actualSlime; SlimeInserter inserter(actualSlime); this->index->get_state(inserter); const auto &root = actualSlime.get(); EXPECT_EQ(0, root["memory_usage"]["onHold"].asLong()); - EXPECT_EQ(8, root["nodes"].asLong()); + EXPECT_EQ(8, root["nodeid_limit"].asLong()); + EXPECT_EQ(6, root["nodes"].asLong()); EXPECT_EQ(6, root["valid_nodes"].asLong()); EXPECT_EQ(0, root["level_histogram"][0].asLong()); EXPECT_EQ(4, root["level_histogram"][1].asLong()); @@ -498,17 +527,21 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic TYPED_TEST(HnswIndexTest, manual_insert) { this->init(false); + EXPECT_EQ(0, this->get_active_nodes()); std::vector<uint32_t> nbl; HnswTestNode empty{nbl}; this->index->set_node(1, empty); + EXPECT_EQ(1, this->get_active_nodes()); this->index->set_node(2, empty); + EXPECT_EQ(2, this->get_active_nodes()); HnswTestNode three{{1,2}}; this->index->set_node(3, three); this->expect_level_0(1, {3}); this->expect_level_0(2, {3}); this->expect_level_0(3, {1,2}); + EXPECT_EQ(3, this->get_active_nodes()); this->expect_entry_point(1, 0); @@ -517,6 +550,7 @@ TYPED_TEST(HnswIndexTest, manual_insert) this->expect_entry_point(4, 1); this->expect_level_0(1, {3,4}); + EXPECT_EQ(4, this->get_active_nodes()); HnswTestNode five{{{1,2}, {4}}}; this->index->set_node(5, five); @@ -526,6 +560,7 @@ TYPED_TEST(HnswIndexTest, manual_insert) this->expect_levels(3, {{1,2}}); this->expect_levels(4, {{1}, {5}}); this->expect_levels(5, {{1,2}, {4}}); + EXPECT_EQ(5, this->get_active_nodes()); } TYPED_TEST(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph) @@ -540,12 +575,14 @@ TYPED_TEST(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph) this->expect_level_0(1, {2,3}); this->expect_level_0(2, {1,3}); this->expect_level_0(3, {1,2}); + EXPECT_EQ(3, this->get_active_nodes()); auto mem_2 = this->memory_usage(); // We should use more memory with larger link arrays and extra document. EXPECT_GT((mem_2.usedBytes() - mem_2.deadBytes()), (mem_1.usedBytes() - mem_1.deadBytes())); EXPECT_EQ(0, mem_2.allocatedBytesOnHold()); this->remove_document(2); + EXPECT_EQ(2, this->get_active_nodes()); size_t nodes_growth = 0; if constexpr (TestFixture::is_single) { this->expect_level_0(1, {3}); @@ -562,6 +599,7 @@ TYPED_TEST(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph) // We end up in the same state as before document 2 was added and effectively use the same amount of memory. EXPECT_EQ((mem_1.usedBytes() - mem_1.deadBytes() + nodes_growth), (mem_3.usedBytes() - mem_3.deadBytes())); EXPECT_EQ(0, mem_3.allocatedBytesOnHold()); + EXPECT_EQ(2, this->get_active_nodes()); } TYPED_TEST(HnswIndexTest, memory_is_put_on_hold_while_read_guard_is_held) @@ -957,16 +995,21 @@ TYPED_TEST(TwoPhaseTest, two_phase_add) this->expect_levels(4, {{1,2}, {5,6}}); this->expect_levels(5, {{2,3}, {4,6}}); this->expect_levels(6, {{1,3}, {4,5}, {}}); + EXPECT_EQ(6, this->get_active_nodes()); auto up = this->prepare_add(7, 1); // simulate things happening while 7 is in progress: this->add_document(8); // added + EXPECT_EQ(7, this->get_active_nodes()); this->remove_document(1); // removed this->remove_document(5); + EXPECT_EQ(5, this->get_active_nodes()); this->vectors.set(5, {8, 14}); // updated and moved this->add_document(5, 2); this->add_document(9, 1); // added + EXPECT_EQ(7, this->get_active_nodes()); this->complete_add(7, std::move(up)); + EXPECT_EQ(8, this->get_active_nodes()); auto& id_mapping = this->index->get_id_mapping(); auto nodeids = id_mapping.get_ids(7); diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp index b64d87d24ea..efaa8ca0171 100644 --- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp @@ -167,6 +167,7 @@ public: void expect_copy_as_populated() const { EXPECT_EQ(copy.size(), 7); + EXPECT_EQ(4, copy.get_active_nodes()); auto entry = copy.get_entry_node(); EXPECT_EQ(entry.nodeid, 2); EXPECT_EQ(entry.level, 1); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index 56459d47dab..16e2af0ad97 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -11,6 +11,7 @@ template <HnswIndexType type> HnswGraph<type>::HnswGraph() : nodes(), nodes_size(1u), + active_nodes(0u), levels_store(HnswIndex<type>::make_default_level_array_store_config(), {}), links_store(HnswIndex<type>::make_default_link_array_store_config(), {}), entry_nodeid_and_level() @@ -40,6 +41,9 @@ HnswGraph<type>::make_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, u if (nodeid >= nodes_size.load(std::memory_order_relaxed)) { nodes_size.store(nodeid + 1, std::memory_order_release); } + if (levels_ref.valid()) { + set_active_nodes(get_active_nodes() + 1); + } return levels_ref; } @@ -58,6 +62,7 @@ HnswGraph<type>::remove_node(uint32_t nodeid) auto old_links_ref = levels[i].load_relaxed(); links_store.remove(old_links_ref); } + set_active_nodes(get_active_nodes() - 1); if (nodeid + 1 == nodes_size.load(std::memory_order_relaxed)) { trim_nodes_size(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 52d227bead1..a1a9e9632be 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -47,6 +47,7 @@ struct HnswGraph { NodeVector nodes; std::atomic<uint32_t> nodes_size; + std::atomic<uint32_t> active_nodes; LevelArrayStore levels_store; LinkArrayStore links_store; @@ -169,6 +170,8 @@ struct HnswGraph { } size_t size() const { return nodes_size.load(std::memory_order_acquire); } + uint32_t get_active_nodes() const noexcept { return active_nodes.load(std::memory_order_relaxed); } + void set_active_nodes(uint32_t value) noexcept { active_nodes.store(value, std::memory_order_relaxed); } struct Histograms { std::vector<uint32_t> level_histogram; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index f0dcbe30317..bf62fcdaa23 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -609,8 +609,8 @@ HnswIndex<type>::prepare_add_document(uint32_t docid, VectorBundle vectors, vespalib::GenerationHandler::Guard read_guard) const { - uint32_t max_nodes = _graph.nodes_size.load(std::memory_order_acquire); - if (max_nodes < _cfg.min_size_before_two_phase()) { + uint32_t active_nodes = _graph.get_active_nodes(); + if (active_nodes < _cfg.min_size_before_two_phase()) { // the first documents added will do all work in write thread // to ensure they are linked together: return std::make_unique<PreparedFirstAddDoc>(); @@ -628,7 +628,7 @@ HnswIndex<type>::complete_add_document(uint32_t docid, std::unique_ptr<PrepareRe internal_complete_add(docid, *prepared); } else { // we expect this for the first documents added, so no warning for them - if (_graph.nodes.size() > 1.25 * _cfg.min_size_before_two_phase()) { + if (_graph.get_active_nodes() > 1.25 * _cfg.min_size_before_two_phase()) { LOG(warning, "complete_add_document(%u) called with invalid prepare_result %s/%u", docid, (prepared ? "valid ptr" : "nullptr"), (prepared ? prepared->docid : 0u)); } @@ -824,7 +824,8 @@ HnswIndex<type>::get_state(const vespalib::slime::Inserter& inserter) const StateExplorerUtils::memory_usage_to_slime(_graph.nodes.getMemoryUsage(), memUsageObj.setObject("nodes")); StateExplorerUtils::memory_usage_to_slime(_graph.levels_store.getMemoryUsage(), memUsageObj.setObject("levels")); StateExplorerUtils::memory_usage_to_slime(_graph.links_store.getMemoryUsage(), memUsageObj.setObject("links")); - object.setLong("nodes", _graph.size()); + object.setLong("nodeid_limit", _graph.size()); + object.setLong("nodes", _graph.get_active_nodes()); auto& histogram_array = object.setArray("level_histogram"); auto& links_hst_array = object.setArray("level_0_links_histogram"); auto histograms = _graph.histograms(); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index eeaa30f55df..d94b6584c35 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -240,6 +240,8 @@ public: uint32_t get_entry_nodeid() const { return _graph.get_entry_node().nodeid; } int32_t get_entry_level() const { return _graph.get_entry_node().level; } + uint32_t get_active_nodes() const noexcept { return _graph.get_active_nodes(); } + // Should only be used by unit tests. HnswTestNode get_node(uint32_t nodeid) const; void set_node(uint32_t nodeid, const HnswTestNode &node); diff --git a/searchsummary/src/tests/juniper/auxTest.cpp b/searchsummary/src/tests/juniper/auxTest.cpp index b5c3bb91d05..439296037d3 100644 --- a/searchsummary/src/tests/juniper/auxTest.cpp +++ b/searchsummary/src/tests/juniper/auxTest.cpp @@ -594,7 +594,7 @@ void AuxTest::TestJuniperStack() // Stack simplification tests QueryExpr* q = new QueryNode(1, 0, 0); QueryExpr* q1 = new QueryNode(1, 0, 0); - QueryExpr* q2 = new QueryTerm("Hepp", 4, 0); + QueryExpr* q2 = new QueryTerm("Hepp", 0, 100); q->AddChild(q1); q1->AddChild(q2); @@ -653,7 +653,7 @@ struct QB { QB(size_t numTerms) : q(new QueryNode(numTerms, 0, 0)) {} QB(QB & rhs) : q(std::move(rhs.q)) { } QB & add(const char * t, bool st = true) { - QueryTerm * qt = new QueryTerm(t, strlen(t), 0); + QueryTerm * qt = new QueryTerm(t, 0, 100); if (st) qt->_options |= X_SPECIALTOKEN; q->AddChild(qt); return *this; @@ -671,7 +671,7 @@ struct Ctx { }; Ctx::Ctx(const std::string & text_, QB & qb_) : text(text_), qb(qb_), str(qb.q.get()), wf(), tp(text), jt(&wf, text.c_str(), text.size(), &tp, &str) { jt.scan(); } -Ctx::~Ctx() { } +Ctx::~Ctx() = default; void AuxTest::TestSpecialTokenRegistry() diff --git a/searchsummary/src/vespa/juniper/matchobject.cpp b/searchsummary/src/vespa/juniper/matchobject.cpp index 7bd0bead5cd..78f508f3ed0 100644 --- a/searchsummary/src/vespa/juniper/matchobject.cpp +++ b/searchsummary/src/vespa/juniper/matchobject.cpp @@ -1,8 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "query.h" #include "matchobject.h" -#include "juniperdebug.h" #include "juniper_separators.h" #include "result.h" #include "charutil.h" @@ -16,7 +14,7 @@ using namespace juniper::separators; class traverser : public IQueryExprVisitor { public: - traverser(MatchObject& mo) : _mo(mo) {} + explicit traverser(MatchObject& mo) noexcept : _mo(mo) {} void VisitQueryNode(QueryNode*) override { // We must not add this node to nonterminals before all children has been added! @@ -24,13 +22,11 @@ public: // until no more candidates... } - void RevisitQueryNode(QueryNode* n) override - { + void RevisitQueryNode(QueryNode* n) override { _mo.add_nonterm(n); } - void VisitQueryTerm(QueryTerm* t) override - { + void VisitQueryTerm(QueryTerm* t) override { if (t->rewriter && t->rewriter->ForDocument()) _mo.add_reduction_term(t, t->rewriter); else @@ -44,21 +40,18 @@ private: class query_expander : public IQueryExprVisitor { public: - query_expander(MatchObject& mo, uint32_t langid) + query_expander(MatchObject& mo, uint32_t langid) noexcept : _caller(), _mo(mo), _langid(langid) {} - void VisitQueryTerm(QueryTerm* orig) override - { - const char* nt = NULL; + void VisitQueryTerm(QueryTerm* orig) override { + const char* nt = nullptr; size_t length; - juniper::RewriteHandle* te = NULL; + juniper::RewriteHandle* te = nullptr; bool reduction = false; - if (orig->rewriter) - { + if (orig->rewriter) { // Check if expansions are necessary - if (orig->rewriter->ForQuery()) - { + if (orig->rewriter->ForQuery()) { te = orig->rewriter->Rewrite(_langid, orig->term()); if (te) nt = orig->rewriter->NextTerm(te, length); @@ -69,8 +62,7 @@ public: // to a separate mapping reduction = orig->rewriter->ForDocument(); } - if (nt == NULL) - { + if (nt == nullptr) { QueryTerm* t = new QueryTerm(orig); // No matches found, just clone term.. if (!reduction) _mo.add_queryterm(t); @@ -81,9 +73,8 @@ public: } // Start expanding... std::vector<QueryTerm*> newterms; - while (nt != NULL) - { - QueryTerm* nqt = new QueryTerm(nt, length, -1); + while (nt != nullptr) { + QueryTerm* nqt = new QueryTerm(vespalib::stringref(nt, length), -1, 100); // Copy options but do not apply juniper stem match for expanded terms nqt->_options = orig->_options | X_EXACT; if (!reduction) @@ -93,8 +84,7 @@ public: newterms.push_back(nqt); nt = orig->rewriter->NextTerm(te, length); } - if (newterms.size() == 1) - { + if (newterms.size() == 1) { update(newterms.front()); return; } @@ -102,10 +92,8 @@ public: QueryNode* qn = new QueryNode(newterms.size(), orig->_weight, orig->_weight); // preserve options for nodes too, but make the node an OR.. qn->_options = orig->_options | X_OR; - for (std::vector<QueryTerm*>::iterator it = newterms.begin(); - it != newterms.end(); ++it) - { - qn->AddChild(*it); + for (QueryTerm * newTerm : newterms) { + qn->AddChild(newTerm); } update(qn); _mo.add_nonterm(qn); @@ -128,12 +116,11 @@ public: } QueryExpr* NewQuery() { - if (_caller.empty()) return NULL; + if (_caller.empty()) return nullptr; return _caller.top(); } private: - void update(QueryExpr* e) - { + void update(QueryExpr* e) { if (!_caller.empty()) _caller.top()->AddChild(e); } @@ -161,7 +148,7 @@ MatchObject::MatchObject(QueryExpr* query, bool has_reductions) : MatchObject::MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid) : - _query(NULL), + _query(nullptr), _qt(), _nonterms(), _match_overlap(false), @@ -178,8 +165,7 @@ MatchObject::MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid if (LOG_WOULD_LOG(debug)) { std::string s; _query->Dump(s); - LOG(debug, "juniper::MatchObject(language id %d): modified stack: %s", - langid, s.c_str()); + LOG(debug, "juniper::MatchObject(language id %d): modified stack: %s", langid, s.c_str()); } _max_arity = _query->MaxArity(); } @@ -199,8 +185,7 @@ bool MatchObject::Match(MatchObject::iterator& mi, Token& token, unsigned& optio if (!q) return false; options = 0; q->total_match_cnt++; - if (q->ucs4_len == static_cast<size_t>(token.curlen)) - { + if (q->ucs4_len == static_cast<size_t>(token.curlen)) { options |= X_EXACT; q->exact_match_cnt++; } @@ -221,8 +206,7 @@ void MatchObject::add_queryterm(QueryTerm* nt) _qt.push_back(nt); nt->idx = _qt.size() - 1; - _qt_byname.Insert( - *(reinterpret_cast<const queryterm_hashtable::keytype*>(nt->ucs4_term())), nt); + _qt_byname.Insert(*(reinterpret_cast<const queryterm_hashtable::keytype*>(nt->ucs4_term())), nt); LOG(debug, "MatchObject: adding term '%s'", nt->term()); } @@ -242,25 +226,24 @@ void MatchObject::add_reduction_term(QueryTerm* nt, juniper::Rewriter* rw) match_iterator::match_iterator(MatchObject* mo, Result* rhandle) : - _table(mo->_qt_byname), _el(NULL), _rhandle(rhandle), - _reductions(mo->HasReductions()), _reduce_matches(NULL), _reduce_matches_it(), + _table(mo->_qt_byname), _el(nullptr), _rhandle(rhandle), + _reductions(mo->HasReductions()), _reduce_matches(nullptr), _reduce_matches_it(), _mo(mo), _len(0), _stem_min(rhandle->StemMin()), _stemext(rhandle->StemExt()), - _term(NULL) + _term(nullptr) {} -QueryTerm* match_iterator::first() +QueryTerm* +match_iterator::first() { - for (; _el != NULL; _el = _el->GetNext()) - { + for (; _el != nullptr; _el = _el->GetNext()) { QueryTerm* q = _el->GetItem(); // If exact match is desired by this subexpression, // only have effect if exact match - if (q->Exact() && _len > q->len) continue; + if (q->Exact() && _len > q->len()) continue; - if (q->is_wildcard()) - { + if (q->is_wildcard()) { if (fast::util::wildcard_match(_term, q->ucs4_term()) == false) continue; return q; } @@ -268,30 +251,28 @@ QueryTerm* match_iterator::first() if (_len < q->ucs4_len) continue; // allow prefix match iff prefix query term or // rest < _stem_extend and length > stem_min - if (!q->is_prefix()) - { + if (!q->is_prefix()) { size_t stem_extend = (q->ucs4_len <= _stem_min ? 0 : _stemext); if (_len > q->ucs4_len + stem_extend) continue; } if (juniper::strncmp(_term, q->ucs4_term(), q->ucs4_len) != 0) continue; return q; } - return NULL; + return nullptr; } QueryTerm* match_iterator::next_reduce_match() { - if (!_reduce_matches) return NULL; - if (_reduce_matches_it != _reduce_matches->end()) - { + if (!_reduce_matches) return nullptr; + if (_reduce_matches_it != _reduce_matches->end()) { QueryTerm* t = *_reduce_matches_it; ++_reduce_matches_it; return t; } delete _reduce_matches; - _reduce_matches = NULL; - return NULL; + _reduce_matches = nullptr; + return nullptr; } @@ -321,7 +302,7 @@ QueryTerm* match_iterator::first_match(Token& token) token.curlen = term - token.token; LOG(debug, "recurse A to match token %u..%u len %d", token.token[0], token.token[token.curlen-1], token.curlen); qt = this->first_match(token); - if (qt != NULL) { + if (qt != nullptr) { return qt; } token.token = ++term; // skip SPACE @@ -348,7 +329,7 @@ QueryTerm* match_iterator::first_match(Token& token) queryterm_hashtable::keytype keyval = termval; if (LOG_WOULD_LOG(spam)) { char utf8term[1024]; - Fast_UnicodeUtil::utf8ncopy(utf8term, term, 1024, (term != NULL ? len : 0)); + Fast_UnicodeUtil::utf8ncopy(utf8term, term, 1024, (term != nullptr ? len : 0)); LOG(spam, "term %s, len %ld, keyval 0x%x termval 0x%x", utf8term, len, keyval, termval); } @@ -356,22 +337,18 @@ QueryTerm* match_iterator::first_match(Token& token) _len = len; QueryTerm* rtrn = first(); - if (rtrn == 0) - { + if (rtrn == 0) { _el = _table.FindRef('*'); - if ((rtrn = first()) == 0) - { + if ((rtrn = first()) == 0) { _el = _table.FindRef('?'); rtrn = first(); } } - if (_reductions) - { + if (_reductions) { _reduce_matches = _mo->_reduce_matchers.match(_rhandle->_langid, &_rhandle->_docsum[token.bytepos], token.bytelen); - if (_reduce_matches) - { + if (_reduce_matches) { _reduce_matches_it = _reduce_matches->begin(); // Find the first reduce match only if no other match was found @@ -388,26 +365,24 @@ QueryTerm* match_iterator::first_match(Token& token) QueryTerm* match_iterator::current() { if (_el) return _el->GetItem(); - if (!_reduce_matches) return NULL; - if (_reduce_matches_it != _reduce_matches->end()) - { + if (!_reduce_matches) return nullptr; + if (_reduce_matches_it != _reduce_matches->end()) { QueryTerm* t = *_reduce_matches_it; return t; } delete _reduce_matches; - return NULL; + return nullptr; } QueryTerm* match_iterator::next() { - if (_el) - { + if (_el) { _el = _el->GetNext(); return first(); } else if (_reduce_matches) return next_reduce_match(); - return NULL; + return nullptr; } diff --git a/searchsummary/src/vespa/juniper/matchobject.h b/searchsummary/src/vespa/juniper/matchobject.h index d31e071d688..8289d6275ed 100644 --- a/searchsummary/src/vespa/juniper/matchobject.h +++ b/searchsummary/src/vespa/juniper/matchobject.h @@ -17,9 +17,8 @@ using Token = ITokenProcessor::Token; // struct QueryTermLengthComparator { - inline bool operator()(QueryTerm* m1, QueryTerm* m2) - { - return m1->len <= m2->len; + bool operator()(const QueryTerm* m1, const QueryTerm* m2) { + return m1->len() <= m2->len(); } }; diff --git a/searchsummary/src/vespa/juniper/querynode.cpp b/searchsummary/src/vespa/juniper/querynode.cpp index a52f8d07c80..b805c12e86f 100644 --- a/searchsummary/src/vespa/juniper/querynode.cpp +++ b/searchsummary/src/vespa/juniper/querynode.cpp @@ -12,65 +12,67 @@ LOG_SETUP(".juniper.querynode"); * in Matcher.h */ -QueryExpr::QueryExpr(int weight, int arity) : - _options(0), _weight(weight), _arity(arity), _parent(nullptr), _childno(0) +QueryExpr::QueryExpr(int weight, int arity) + : _parent(nullptr), + _options(0), + _weight(weight), + _arity(arity), + _childno(0) { } -QueryExpr::QueryExpr(QueryExpr* e) : - _options(e->_options), - _weight(e->_weight), - _arity(e->_arity), - _parent(nullptr), - _childno(0) +QueryExpr::QueryExpr(QueryExpr* e) + : _parent(nullptr), + _options(e->_options), + _weight(e->_weight), + _arity(e->_arity), + _childno(0) { } QueryExpr::~QueryExpr() = default; -QueryTerm::QueryTerm(const char* t, int length, int ix, int wgt) - : QueryExpr(wgt, 0), len(length), +QueryTerm::QueryTerm(vespalib::stringref term, int ix, int wgt) + : QueryExpr(wgt, 0), ucs4_len(0), - total_match_cnt(0), exact_match_cnt(0), - idx(ix), rewriter(nullptr), reduce_matcher(nullptr), _rep(nullptr), - _ucs4_term(nullptr) + total_match_cnt(0), + exact_match_cnt(0), + idx(ix), + rewriter(nullptr), + reduce_matcher(nullptr), + _term(term), + _ucs4_term(new ucs4_t[_term.size()+1]) { - if (len <= 0) - len = strlen(t); - _rep = new char[len+1]; - strncpy(_rep, t, len); _rep[len] = '\0'; - _ucs4_term = new ucs4_t[len+1]; - Fast_UnicodeUtil::ucs4copy(_ucs4_term, _rep); + Fast_UnicodeUtil::ucs4copy(_ucs4_term, _term.c_str()); ucs4_len = Fast_UnicodeUtil::ucs4strlen(_ucs4_term); } QueryTerm::QueryTerm(QueryTerm* t) - : QueryExpr(t), len(t->len), + : QueryExpr(t), ucs4_len(0), total_match_cnt(0), exact_match_cnt(0), - idx(-1), rewriter(nullptr), reduce_matcher(nullptr), _rep(nullptr), - _ucs4_term(nullptr) + idx(-1), rewriter(nullptr), reduce_matcher(nullptr), + _term(t->_term), + _ucs4_term(new ucs4_t[_term.size()+1]) { - _rep = new char[len+1]; - strncpy(_rep, t->term(), len); _rep[len] = '\0'; - _ucs4_term = new ucs4_t[len+1]; - Fast_UnicodeUtil::ucs4copy(_ucs4_term, _rep); + Fast_UnicodeUtil::ucs4copy(_ucs4_term, _term.c_str()); ucs4_len = Fast_UnicodeUtil::ucs4strlen(_ucs4_term); } -QueryTerm::~QueryTerm() -{ - delete[] _rep; +QueryTerm::~QueryTerm() { delete[] _ucs4_term; } -QueryNode::QueryNode(int arity, int threshold, int weight) : - QueryExpr(weight, arity), _threshold(threshold), _limit(0), - _children(nullptr), - _nchild(0), _node_idx(-1) +QueryNode::QueryNode(int arity, int threshold, int weight) + : QueryExpr(weight, arity), + _children(nullptr), + _threshold(threshold), + _limit(0), + _nchild(0), + _node_idx(-1) { assert(arity > 0); _children = new QueryExpr*[arity]; @@ -79,9 +81,9 @@ QueryNode::QueryNode(int arity, int threshold, int weight) : QueryNode::QueryNode(QueryNode* n) : QueryExpr(n), + _children(nullptr), _threshold(n->_threshold), _limit(n->_limit), - _children(nullptr), _nchild(0), _node_idx(n->_node_idx) { @@ -115,12 +117,10 @@ QueryTerm::AddChild(QueryExpr*) QueryNode* -QueryNode::AddChild(QueryExpr* child) -{ - if (!child) +QueryNode::AddChild(QueryExpr* child) { + if (!child) { _arity--; - else - { + } else { child->_parent = this; child->_childno = _nchild; _children[_nchild++] = child; @@ -223,18 +223,6 @@ QueryNode::MaxArity() return max_arity; } - -bool -QueryNode::AcceptsInitially(QueryExpr* n) -{ - assert(n->_parent == this); -// return (!(_options & X_ORDERED)) || n->_childno == 0; - // currently implicitly add all terms even for ordered.. - (void) n; - return true; -} - - /** Modify the given stack by eliminating unnecessary internal nodes * with arity 1 or non-terms with arity 0 */ diff --git a/searchsummary/src/vespa/juniper/querynode.h b/searchsummary/src/vespa/juniper/querynode.h index cf8f7eb119f..b80831c302d 100644 --- a/searchsummary/src/vespa/juniper/querynode.h +++ b/searchsummary/src/vespa/juniper/querynode.h @@ -41,7 +41,7 @@ using querynode_vector = std::vector<QueryNode*>; class IQueryExprVisitor { public: - virtual ~IQueryExprVisitor() {} + virtual ~IQueryExprVisitor() = default; // Visit before visiting subnodes virtual void VisitQueryNode(QueryNode*) = 0; @@ -56,7 +56,9 @@ public: class QueryExpr { public: - explicit QueryExpr(int weight, int arity); + QueryExpr(const QueryExpr &) = delete; + QueryExpr &operator=(const QueryExpr &) = delete; + QueryExpr(int weight, int arity); explicit QueryExpr(QueryExpr* e); /** Add a child to the end of the list of children for this node. @@ -79,20 +81,14 @@ public: virtual int MaxArity() { return 0; } - inline bool HasConstraints() { return _options & X_CONSTR; } - inline bool UsesValid() { return _options & X_CHKVAL; } - inline bool HasLimit() { return _options & X_LIMIT; } - inline bool Exact() { return _options & X_EXACT; } + bool HasLimit() const noexcept { return _options & X_LIMIT; } + bool Exact() const noexcept { return _options & X_EXACT; } - int _options; // Applied options (bitmap) for this node - int _weight; // Weight of this term by parent - if 0: weight is sum of children - int _arity; // Arity of this query subexpression (may get decremented..) - QueryNode* _parent; // Pointer to parent or NULL if this is the root of the query - int _childno; // Position number within parent's children (0 if no parents) - -private: - QueryExpr(QueryExpr &); - QueryExpr &operator=(QueryExpr &); + QueryNode* _parent; // Pointer to parent or NULL if this is the root of the query + int _options; // Applied options (bitmap) for this node + int _weight; // Weight of this term by parent - if 0: weight is sum of children + int _arity; // Arity of this query subexpression (may get decremented..) + int _childno; // Position number within parent's children (0 if no parents) }; @@ -123,16 +119,11 @@ public: void Accept(IQueryExprVisitor& v) override; - // return true if a match for n should lead to creation of a new candidate node - // corresponding to this query tree node: - bool AcceptsInitially(QueryExpr* n); - - int _threshold; // Threshold for this expression node to be considered complete - int _limit; // NEAR/WITHIN limit if X_LIMIT option set - /* Pointer to an array of length _arity of pointers to * subqueries associated with this query */ QueryExpr** _children; + int _threshold; // Threshold for this expression node to be considered complete + int _limit; // NEAR/WITHIN limit if X_LIMIT option set int _nchild; // end pointer (fill level) of _children int _node_idx; // Index (position) of this nonterminal within table of all nonterminals }; @@ -143,9 +134,11 @@ public: class QueryTerm : public QueryExpr { public: - QueryTerm(const char* t, int length, int ix, int weight = 100); - explicit QueryTerm(QueryTerm* const); - ~QueryTerm(); + QueryTerm(const QueryTerm &) = delete; + QueryTerm &operator=(const QueryTerm &) = delete; + QueryTerm(vespalib::stringref, int ix, int weight); + explicit QueryTerm(QueryTerm*); + ~QueryTerm() override; int Limit() override; QueryNode* AddChild(QueryExpr* child) override; void Dump(std::string&) override; @@ -155,13 +148,12 @@ public: bool Complex() override { return false; } void Accept(IQueryExprVisitor& v) override; - inline const char* term() { return _rep; } - inline const ucs4_t* ucs4_term() { return _ucs4_term; } - inline bool is_prefix() { return _options & X_PREFIX; } - inline bool is_wildcard() { return _options & X_WILD; } - inline bool isSpecialToken() { return _options & X_SPECIALTOKEN; } - - size_t len; + const char* term() const noexcept { return _term.c_str(); } + const ucs4_t* ucs4_term() const noexcept { return _ucs4_term; } + bool is_prefix() const noexcept { return _options & X_PREFIX; } + bool is_wildcard() const noexcept { return _options & X_WILD; } + bool isSpecialToken() const noexcept { return _options & X_SPECIALTOKEN; } + size_t len() const noexcept { return _term.size(); } size_t ucs4_len; int total_match_cnt; int exact_match_cnt; @@ -169,11 +161,8 @@ public: juniper::Rewriter* rewriter; juniper::string_matcher* reduce_matcher; private: - char* _rep; + vespalib::string _term; ucs4_t* _ucs4_term; - - QueryTerm(QueryTerm &); - QueryTerm &operator=(QueryTerm &); }; diff --git a/searchsummary/src/vespa/juniper/queryvisitor.cpp b/searchsummary/src/vespa/juniper/queryvisitor.cpp index 6192cb985b9..6a4631bc3ae 100644 --- a/searchsummary/src/vespa/juniper/queryvisitor.cpp +++ b/searchsummary/src/vespa/juniper/queryvisitor.cpp @@ -251,7 +251,7 @@ QueryVisitor::visitKeyword(const QueryItem* item, vespalib::stringref keyword, b ind.c_str(), (!ind.empty() ? ":" : ""), s.c_str()); } - auto * term = new QueryTerm(keyword.data(), keyword.size(), _term_index++, item->get_weight()); + auto * term = new QueryTerm(keyword, _term_index++, item->get_weight()); if (prefix) { bool is_wild = std::any_of(keyword.begin(), keyword.end(), [](char c) {return (c == '*') || (c == '?'); }); term->_options |= (is_wild ? X_WILD : X_PREFIX); diff --git a/vespalib/src/tests/alloc/alloc_test.cpp b/vespalib/src/tests/alloc/alloc_test.cpp index b50c6d782e2..1436724267d 100644 --- a/vespalib/src/tests/alloc/alloc_test.cpp +++ b/vespalib/src/tests/alloc/alloc_test.cpp @@ -198,23 +198,6 @@ void verifyNoExtensionWhenNoRoom(Alloc & buf, Alloc & reserved, size_t sz) { } #ifdef __linux__ -TEST("auto alloced mmap alloc can be extended if room") { - static constexpr size_t SZ = MemoryAllocator::HUGEPAGE_SIZE*2; - Alloc reserved = Alloc::alloc(SZ); - Alloc buf = Alloc::alloc(SZ); - - TEST_DO(ensureRoomForExtension(buf, reserved)); - TEST_DO(verifyExtension(buf, SZ, (SZ/2)*3)); -} - -TEST("auto alloced mmap alloc can not be extended if no room") { - static constexpr size_t SZ = MemoryAllocator::HUGEPAGE_SIZE*2; - Alloc reserved = Alloc::alloc(SZ); - Alloc buf = Alloc::alloc(SZ); - - TEST_DO(verifyNoExtensionWhenNoRoom(buf, reserved, SZ)); -} - /* * The two following tests are disabled when any sanitizer is * enabled since extra instrumentation code might trigger extra mmap |