diff options
author | Arnstein Ressem <aressem@vespa.ai> | 2024-06-03 09:18:19 +0200 |
---|---|---|
committer | Arnstein Ressem <aressem@vespa.ai> | 2024-06-03 09:18:19 +0200 |
commit | 6e455c397bb6966049ed5f8691e60c87d40c51bb (patch) | |
tree | 3b83b5ae53accab06de07d858eeb64712336c802 | |
parent | 38ae0c486f951b78d1c7b128c8189fe8e5684a0c (diff) | |
parent | 8dc3cc1a3f2af0158a3b85098433dfd10129ede0 (diff) |
Merge branch 'master' into aressem/test-valgrind
11 files changed, 340 insertions, 60 deletions
diff --git a/client/js/app/yarn.lock b/client/js/app/yarn.lock index c0a1c8be5d3..72d9d848d27 100644 --- a/client/js/app/yarn.lock +++ b/client/js/app/yarn.lock @@ -830,6 +830,15 @@ resolved "https://registry.yarnpkg.com/@eslint-community/regexpp/-/regexpp-4.10.0.tgz#548f6de556857c8bb73bbee70c35dc82a2e74d63" integrity sha512-Cu96Sd2By9mCNTx2iyKOmq10v22jUVQv0lQnlGNy16oE9589yE+QADPbrMGCkA51cKZSg3Pu/aTJVTGfL/qjUA== +"@eslint/config-array@^0.15.1": + version "0.15.1" + resolved "https://registry.yarnpkg.com/@eslint/config-array/-/config-array-0.15.1.tgz#1fa78b422d98f4e7979f2211a1fde137e26c7d61" + integrity sha512-K4gzNq+yymn/EVsXYmf+SBcBro8MTf+aXJZUphM96CdzUEr+ClGDvAbpmaEK+cGVigVXIgs9gNmvHAlrzzY5JQ== + dependencies: + "@eslint/object-schema" "^2.1.3" + debug "^4.3.1" + minimatch "^3.0.5" + "@eslint/eslintrc@^3.1.0": version "3.1.0" resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-3.1.0.tgz#dbd3482bfd91efa663cbe7aa1f506839868207b6" @@ -845,10 +854,15 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" -"@eslint/js@9.3.0": - version "9.3.0" - resolved "https://registry.yarnpkg.com/@eslint/js/-/js-9.3.0.tgz#2e8f65c9c55227abc4845b1513c69c32c679d8fe" - integrity sha512-niBqk8iwv96+yuTwjM6bWg8ovzAPF9qkICsGtcoa5/dmqcEMfdwNAX7+/OHcJHc7wj7XqPxH98oAHytFYlw6Sw== +"@eslint/js@9.4.0": + version "9.4.0" + resolved "https://registry.yarnpkg.com/@eslint/js/-/js-9.4.0.tgz#96a2edd37ec0551ce5f9540705be23951c008a0c" + integrity sha512-fdI7VJjP3Rvc70lC4xkFXHB0fiPeojiL1PxVG6t1ZvXQrarj893PweuBTujxDUFk0Fxj4R7PIIAZ/aiiyZPZcg== + +"@eslint/object-schema@^2.1.3": + version "2.1.3" + resolved "https://registry.yarnpkg.com/@eslint/object-schema/-/object-schema-2.1.3.tgz#e65ae80ee2927b4fd8c5c26b15ecacc2b2a6cc2a" + integrity sha512-HAbhAYKfsAC2EkTqve00ibWIZlaU74Z1EHwAjYr4PXF0YU2VEA1zSIKSSpKszRLRWwHzzRZXvK632u+uXzvsvw== "@floating-ui/core@^1.4.2": version "1.5.0" @@ -919,25 +933,11 @@ dependencies: prop-types "^15.8.1" -"@humanwhocodes/config-array@^0.13.0": - version "0.13.0" - resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.13.0.tgz#fb907624df3256d04b9aa2df50d7aa97ec648748" - integrity sha512-DZLEEqFWQFiyK6h5YIeynKx7JlvCYWL0cImfSRXZ9l4Sg2efkFGTuFf6vzXjK1cq6IYkU+Eg/JizXw+TD2vRNw== - dependencies: - "@humanwhocodes/object-schema" "^2.0.3" - debug "^4.3.1" - minimatch "^3.0.5" - "@humanwhocodes/module-importer@^1.0.1": version "1.0.1" resolved "https://registry.yarnpkg.com/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz#af5b2691a22b44be847b0ca81641c5fb6ad0172c" integrity sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA== -"@humanwhocodes/object-schema@^2.0.3": - version "2.0.3" - resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz#4a2868d75d6d6963e423bcf90b7fd1be343409d3" - integrity sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA== - "@humanwhocodes/retry@^0.3.0": version "0.3.0" resolved "https://registry.yarnpkg.com/@humanwhocodes/retry/-/retry-0.3.0.tgz#6d86b8cb322660f03d3f0aa94b99bdd8e172d570" @@ -2324,13 +2324,20 @@ debug@^3.2.7: dependencies: ms "^2.1.1" -debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2: +debug@^4.1.0, debug@^4.1.1: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== dependencies: ms "2.1.2" +debug@^4.3.1, debug@^4.3.2: + version "4.3.5" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.5.tgz#e83444eceb9fedd4a1da56d671ae2446a01a6e1e" + integrity sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg== + dependencies: + ms "2.1.2" + decode-uri-component@^0.2.0: version "0.2.2" resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9" @@ -2736,15 +2743,15 @@ eslint-visitor-keys@^4.0.0: integrity sha512-OtIRv/2GyiF6o/d8K7MYKKbXrOUBIK6SfkIRM4Z0dY3w+LiQ0vy3F57m0Z71bjbyeiWFiHJ8brqnmE6H6/jEuw== eslint@^9.0.0: - version "9.3.0" - resolved "https://registry.yarnpkg.com/eslint/-/eslint-9.3.0.tgz#36a96db84592618d6ed9074d677e92f4e58c08b9" - integrity sha512-5Iv4CsZW030lpUqHBapdPo3MJetAPtejVW8B84GIcIIv8+ohFaddXsrn1Gn8uD9ijDb+kcYKFUVmC8qG8B2ORQ== + version "9.4.0" + resolved "https://registry.yarnpkg.com/eslint/-/eslint-9.4.0.tgz#79150c3610ae606eb131f1d648d5f43b3d45f3cd" + integrity sha512-sjc7Y8cUD1IlwYcTS9qPSvGjAC8Ne9LctpxKKu3x/1IC9bnOg98Zy6GxEJUfr1NojMgVPlyANXYns8oE2c1TAA== dependencies: "@eslint-community/eslint-utils" "^4.2.0" "@eslint-community/regexpp" "^4.6.1" + "@eslint/config-array" "^0.15.1" "@eslint/eslintrc" "^3.1.0" - "@eslint/js" "9.3.0" - "@humanwhocodes/config-array" "^0.13.0" + "@eslint/js" "9.4.0" "@humanwhocodes/module-importer" "^1.0.1" "@humanwhocodes/retry" "^0.3.0" "@nodelib/fs.walk" "^1.2.8" @@ -4721,9 +4728,9 @@ prettier-linter-helpers@^1.0.0: fast-diff "^1.1.2" prettier@3: - version "3.2.5" - resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.2.5.tgz#e52bc3090586e824964a8813b09aba6233b28368" - integrity sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A== + version "3.3.0" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.3.0.tgz#d173ea0524a691d4c0b1181752f2b46724328cdf" + integrity sha512-J9odKxERhCQ10OC2yb93583f6UnYutOeiV5i0zEDS7UGTdUt0u+y8erxl3lBKvwo/JHyyoEdXjwp4dke9oyZ/g== pretty-format@^29.7.0: version "29.7.0" diff --git a/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java b/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java index 5f2046b1450..fe4c0af06d6 100644 --- a/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java +++ b/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java @@ -194,6 +194,10 @@ class OverrideProcessor implements PreProcessor { if ( ! elementTags.isEmpty()) { // match tags if ( ! elementTags.intersects(tags)) return false; + // Tags are set on instances. Having a tag match for a deployment to a non-prod environment + // disables the usual downscaling of the cluster, which is surprising. We therefore either + // require the tags match to either also match an environment directive, or the implicit prod. + if (elementEnvironments.isEmpty() && environment != Environment.prod) return false; } return true; diff --git a/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java b/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java index 0cdbed3999c..7190b25965f 100644 --- a/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java +++ b/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java @@ -22,7 +22,7 @@ public class HostedOverrideProcessorTagsTest { "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" + " <container id='foo' version='1.0'>" + " <nodes count='5' deploy:tags='a' deploy:environment='perf'/>" + - " <nodes count='10' deploy:tags='a b'/>" + + " <nodes count='10' deploy:tags='a b' deploy:environment='prod dev'/>" + " <nodes count='20' deploy:tags='c'/>" + " <search deploy:tags='b'/>" + " <document-api deploy:tags='d'/>" + @@ -62,6 +62,38 @@ public class HostedOverrideProcessorTagsTest { } @Test + public void testParsingTagATest() throws TransformerException { + String expected = + "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" + + "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" + + " <container id='foo' version='1.0'>" + + " " + // (╭ರ_•́) + " </container>" + + "</services>"; + assertOverride(InstanceName.defaultName(), + Environment.test, + RegionName.defaultName(), + Tags.fromString("a"), + expected); + } + + @Test + public void testParsingTagADev() throws TransformerException { + String expected = + "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" + + "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" + + " <container id='foo' version='1.0'>" + + " <nodes count='10' required='true'/>" + + " </container>" + + "</services>"; + assertOverride(InstanceName.defaultName(), + Environment.dev, + RegionName.defaultName(), + Tags.fromString("a"), + expected); + } + + @Test public void testParsingTagB() throws TransformerException { String expected = "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" + @@ -79,7 +111,7 @@ public class HostedOverrideProcessorTagsTest { } @Test - public void testParsingTagC() throws TransformerException { + public void testParsingTagCProd() throws TransformerException { String expected = "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" + "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" + @@ -95,6 +127,22 @@ public class HostedOverrideProcessorTagsTest { } @Test + public void testParsingTagCDev() throws TransformerException { + String expected = + "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" + + "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" + + " <container id='foo' version='1.0'>" + + " " + // (╭ರ_•́) + " </container>" + + "</services>"; + assertOverride(InstanceName.defaultName(), + Environment.dev, + RegionName.defaultName(), + Tags.fromString("c"), + expected); + } + + @Test public void testParsingTagCAndD() throws TransformerException { String expected = "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" + diff --git a/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java b/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java index c0a677b2094..2cfb6552379 100644 --- a/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java @@ -141,7 +141,7 @@ public class EventRendererTestCase { }); assertFalse(future.isDone()); result = render(new Result(new Query(), newHitGroup(tokenStream, "token_stream"))); - assertTrue(future.isDone()); // Renderer waits for async completion + future.join(); // Renderer waits for async completion } finally { executor.shutdownNow(); diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index 7e04fe314b4..c5f5d8b832f 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -33,7 +33,7 @@ <!-- DO NOT UPGRADE THESE TO A NEW MAJOR VERSION WITHOUT CHECKING FOR BINARY COMPATIBILITY --> <aopalliance.vespa.version>1.0</aopalliance.vespa.version> - <error-prone-annotations.vespa.version>2.27.1</error-prone-annotations.vespa.version> + <error-prone-annotations.vespa.version>2.28.0</error-prone-annotations.vespa.version> <guava.vespa.version>33.2.0-jre</guava.vespa.version> <guice.vespa.version>6.0.0</guice.vespa.version> <j2objc-annotations.vespa.version>3.0.0</j2objc-annotations.vespa.version> @@ -68,7 +68,7 @@ <assertj.vespa.version>3.26.0</assertj.vespa.version> <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> - <aws-sdk.vespa.version>1.12.733</aws-sdk.vespa.version> + <aws-sdk.vespa.version>1.12.734</aws-sdk.vespa.version> <athenz.vespa.version>1.11.59</athenz.vespa.version> <!-- Athenz END --> @@ -178,9 +178,9 @@ <maven-gpg-plugin.vespa.version>3.2.4</maven-gpg-plugin.vespa.version> <maven-install-plugin.vespa.version>3.1.2</maven-install-plugin.vespa.version> <maven-jar-plugin.vespa.version>3.4.1</maven-jar-plugin.vespa.version> - <maven-javadoc-plugin.vespa.version>3.6.3</maven-javadoc-plugin.vespa.version> + <maven-javadoc-plugin.vespa.version>3.7.0</maven-javadoc-plugin.vespa.version> <maven-plugin-api.vespa.version>${maven-core.vespa.version}</maven-plugin-api.vespa.version> - <maven-plugin-tools.vespa.version>3.13.0</maven-plugin-tools.vespa.version> + <maven-plugin-tools.vespa.version>3.13.1</maven-plugin-tools.vespa.version> <maven-resources-plugin.vespa.version>3.3.1</maven-resources-plugin.vespa.version> <maven-resolver.vespa.version>1.9.20</maven-resolver.vespa.version> <maven-shade-plugin.vespa.version>3.5.3</maven-shade-plugin.vespa.version> diff --git a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java index 607e3482c3d..616b9df4bd8 100644 --- a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java +++ b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java @@ -35,6 +35,7 @@ public record User(String email, String name, String nickname, String picture, b } public static Builder builder() { return new Builder(); } + public static Builder builder(User u) { return new Builder(u); } public static class Builder { private String email; @@ -48,6 +49,17 @@ public record User(String email, String name, String nickname, String picture, b private Builder() {} + private Builder(User u) { + email = u.email; + name = u.name; + nickname = u.nickname; + picture = u.picture; + isVerified = u.isVerified; + loginCount = u.loginCount; + lastLogin = u.lastLogin; + extraAttributes.putAll(u.extraAttributes); + } + public Builder email(String email) { this.email = email; return this; } public Builder name(String name) { this.name = name; return this; } public Builder nickname(String nickname) { this.nickname = nickname; return this; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.h b/searchcore/src/vespa/searchcore/proton/matching/match_params.h index 5b58c11b7e1..f9dd55e7bb1 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.h @@ -27,7 +27,7 @@ struct MatchParams { uint32_t hits_in, bool hasFinalRank, bool needRanking=true); - bool save_rank_scores() const { return ((heapSize + arraySize) != 0); } + bool save_rank_scores() const noexcept { return (arraySize != 0); } bool has_rank_drop_limit() const; }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 211e67f1e2b..6b443231c0a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -367,7 +367,7 @@ MatchThread::findMatches(MatchTools &tools) tools.give_back_search(ProfiledIterator::profile(*match_profiler, tools.borrow_search())); tools.tag_search_as_changed(); } - HitCollector hits(matchParams.numDocs, matchParams.arraySize); + HitCollector hits(matchParams.numDocs, match_with_ranking ? matchParams.arraySize : 0); trace->addEvent(4, "Start match and first phase rank"); /** * All, or none of the threads in the bundle must execute the match loop. @@ -380,7 +380,7 @@ MatchThread::findMatches(MatchTools &tools) secondPhase(tools, hits); } trace->addEvent(4, "Create result set"); - return hits.getResultSet(fallback_rank_value()); + return hits.getResultSet(); } void diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp index 3ac5f419228..60daa571f1d 100644 --- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp +++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp @@ -13,6 +13,8 @@ using namespace search::fef; using namespace search::queryeval; using ScoreMap = std::map<uint32_t, feature_t>; +using DocidVector = std::vector<uint32_t>; +using RankedHitVector = std::vector<RankedHit>; using Ranges = std::pair<Scores, Scores>; @@ -574,4 +576,83 @@ TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_only_after_pa checkResult(*rs, nullptr); } +struct RankDropFixture { + uint32_t _docid_limit; + HitCollector _hc; + std::vector<uint32_t> _dropped; + RankDropFixture(uint32_t docid_limit, uint32_t max_hits_size) + : _docid_limit(docid_limit), + _hc(docid_limit, max_hits_size) + { + } + void add(std::vector<RankedHit> hits) { + for (const auto& hit : hits) { + _hc.addHit(hit.getDocId(), hit.getRank()); + } + } + void rerank(ScoreMap score_map, size_t count) { + PredefinedScorer scorer(score_map); + EXPECT_EQ(count, do_reRank(scorer, _hc, count)); + } + std::unique_ptr<BitVector> make_bv(DocidVector docids) { + auto bv = BitVector::create(_docid_limit); + for (auto& docid : docids) { + bv->setBit(docid); + } + return bv; + } + + void setup() { + // Initial 7 hits from first phase + add({{5, 1100},{10, 1200},{11, 1300},{12, 1400},{14, 500},{15, 900},{16,1000}}); + // Rerank two best hits, calculate old and new ranges for reranked + // hits that will cause hits not reranked to later be rescored by + // dividing by 100. + rerank({{11,14},{12,13}}, 2); + } + void check_result(std::optional<double> rank_drop_limit, RankedHitVector exp_array, + std::unique_ptr<BitVector> exp_bv, DocidVector exp_dropped) { + auto rs = _hc.get_result_set(rank_drop_limit, &_dropped); + checkResult(*rs, exp_array); + checkResult(*rs, exp_bv.get()); + EXPECT_EQ(exp_dropped, _dropped); + } +}; + +TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced) +{ + // Track rank score for all 7 hits from first phase + RankDropFixture f(10000, 10); + f.setup(); + f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13},{16,10}}, + {}, {14, 15}); +} + +TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced_when_docid_vector_is_used) +{ + // Track rank score for 4 best hits from first phase, overflow to docid vector + RankDropFixture f(10000, 4); + f.setup(); + f.check_result(13.0, {{11,14}}, + {}, {5,10,12,14,15,16}); +} + +TEST(HitCollectorTest, require_that_bitvector_is_not_dropped_without_second_phase_rank_drop_limit) +{ + // Track rank score for 4 best hits from first phase, overflow to bitvector + RankDropFixture f(20, 4); + f.setup(); + f.check_result(std::nullopt, {{5,11},{10,12},{11,14},{12,13}}, + f.make_bv({5,10,11,12,14,15,16}), {}); +} + +TEST(HitCollectorTest, require_that_bitvector_is_dropped_with_second_phase_rank_drop_limit) +{ + // Track rank for 4 best hits from first phase, overflow to bitvector + RankDropFixture f(20, 4); + f.setup(); + f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13}}, + {}, {14,15,16}); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp index c1d59463ad9..01587ef485a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp @@ -219,71 +219,180 @@ public: } }; -template <typename Rescorer> +class SimpleHitAdder { +protected: + ResultSet& _rs; +public: + SimpleHitAdder(ResultSet& rs) + : _rs(rs) + { + } + void add(uint32_t docid, double rank_value) { + _rs.push_back({docid, rank_value}); + } +}; + +class ConditionalHitAdder : public SimpleHitAdder { +protected: + double _second_phase_rank_drop_limit; +public: + ConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit) + : SimpleHitAdder(rs), + _second_phase_rank_drop_limit(second_phase_rank_drop_limit) + { + } + void add(uint32_t docid, double rank_value) { + if (rank_value > _second_phase_rank_drop_limit) { + _rs.push_back({docid, rank_value}); + } + } +}; + +class TrackingConditionalHitAdder : public ConditionalHitAdder { + std::vector<uint32_t>& _dropped; +public: + TrackingConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit, std::vector<uint32_t>& dropped) + : ConditionalHitAdder(rs, second_phase_rank_drop_limit), + _dropped(dropped) + { + } + void add(uint32_t docid, double rank_value) { + if (rank_value > _second_phase_rank_drop_limit) { + _rs.push_back({docid, rank_value}); + } else { + _dropped.emplace_back(docid); + } + } +}; + +template <typename HitAdder, typename Rescorer> void -add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, Rescorer rescorer) +add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, Rescorer rescorer) { for (auto& hit : hits) { - rs.push_back({hit.first, rescorer.rescore(hit.first, hit.second)}); + hit_adder.add(hit.first, rescorer.rescore(hit.first, hit.second)); } } -template <typename Rescorer> +template <typename HitAdder, typename Rescorer> void -add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer) +add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer) { if (reranked_hits.empty()) { - add_rescored_hits(rs, hits, rescorer); + add_rescored_hits(hit_adder, hits, rescorer); } else { - add_rescored_hits(rs, hits, RerankRescorer(rescorer, reranked_hits)); + add_rescored_hits(hit_adder, hits, RerankRescorer(rescorer, reranked_hits)); } } template <typename Rescorer> void -mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, Rescorer rescorer) +add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer) +{ + if (second_phase_rank_drop_limit.has_value()) { + if (dropped != nullptr) { + add_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, reranked_hits, rescorer); + } else { + add_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, reranked_hits, rescorer); + } + } else { + add_rescored_hits(SimpleHitAdder(rs), hits, reranked_hits, rescorer); + } +} + +template <typename HitAdder, typename Rescorer> +void +mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, Rescorer rescorer) { auto hits_cur = hits.begin(); auto hits_end = hits.end(); for (auto docid : docids) { if (hits_cur != hits_end && docid == hits_cur->first) { - rs.push_back({docid, rescorer.rescore(docid, hits_cur->second)}); + hit_adder.add(docid, rescorer.rescore(docid, hits_cur->second)); ++hits_cur; } else { - rs.push_back({docid, default_value}); + hit_adder.add(docid, default_value); } } } -template <typename Rescorer> +template <typename HitAdder, typename Rescorer> void -mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer) +mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer) { if (reranked_hits.empty()) { - mixin_rescored_hits(rs, hits, docids, default_value, rescorer); + mixin_rescored_hits(hit_adder, hits, docids, default_value, rescorer); } else { - mixin_rescored_hits(rs, hits, docids, default_value, RerankRescorer(rescorer, reranked_hits)); + mixin_rescored_hits(hit_adder, hits, docids, default_value, RerankRescorer(rescorer, reranked_hits)); + } +} + +template <typename Rescorer> +void +mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer) +{ + if (second_phase_rank_drop_limit.has_value()) { + if (dropped != nullptr) { + mixin_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, docids, default_value, reranked_hits, rescorer); + } else { + mixin_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, docids, default_value, reranked_hits, rescorer); + } + } else { + mixin_rescored_hits(SimpleHitAdder(rs), hits, docids, default_value, reranked_hits, rescorer); + } +} + +void +add_bitvector_to_dropped(std::vector<uint32_t>& dropped, vespalib::ConstArrayRef<RankedHit> hits, const BitVector& bv) +{ + auto hits_cur = hits.begin(); + auto hits_end = hits.end(); + auto docid = bv.getFirstTrueBit(); + auto docid_limit = bv.size(); + while (docid < docid_limit) { + if (hits_cur != hits_end && hits_cur->getDocId() == docid) { + ++hits_cur; + } else { + dropped.emplace_back(docid); + } + docid = bv.getNextTrueBit(docid + 1); } } } std::unique_ptr<ResultSet> -HitCollector::getResultSet(HitRank default_value) +HitCollector::get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped) { + /* + * Use default_rank_value (i.e. -HUGE_VAL) when hit collector saves + * rank scores, otherwise use zero_rank_value (i.e. 0.0). + */ + auto default_value = save_rank_scores() ? search::default_rank_value : search::zero_rank_value; + bool needReScore = FirstPhaseRescorer::need_rescore(_ranges); FirstPhaseRescorer rescorer(_ranges); + if (dropped != nullptr) { + dropped->clear(); + } + // destroys the heap property or score sort order sortHitsByDocId(); auto rs = std::make_unique<ResultSet>(); - if ( ! _collector->isDocIdCollector() ) { + if ( ! _collector->isDocIdCollector() || + (second_phase_rank_drop_limit.has_value() && + (_bitVector || dropped == nullptr))) { rs->allocArray(_hits.size()); + auto* dropped_or_null = dropped; + if (second_phase_rank_drop_limit.has_value() && _bitVector) { + dropped_or_null = nullptr; + } if (needReScore) { - add_rescored_hits(*rs, _hits, _reRankedHits, rescorer); + add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, rescorer); } else { - add_rescored_hits(*rs, _hits, _reRankedHits, NoRescorer()); + add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, NoRescorer()); } } else { if (_unordered) { @@ -291,12 +400,20 @@ HitCollector::getResultSet(HitRank default_value) } rs->allocArray(_docIdVector.size()); if (needReScore) { - mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, rescorer); + mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, rescorer); } else { - mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, NoRescorer()); + mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, NoRescorer()); } } + if (second_phase_rank_drop_limit.has_value() && _bitVector) { + if (dropped != nullptr) { + assert(dropped->empty()); + add_bitvector_to_dropped(*dropped, {rs->getArray(), rs->getArrayUsed()}, *_bitVector); + } + _bitVector.reset(); + } + if (_bitVector) { rs->setBitOverflow(std::move(_bitVector)); } @@ -304,4 +421,10 @@ HitCollector::getResultSet(HitRank default_value) return rs; } +std::unique_ptr<ResultSet> +HitCollector::getResultSet() +{ + return get_result_set(std::nullopt, nullptr); +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h index 903c2ab5b13..c23fb0a6ef6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h @@ -8,6 +8,7 @@ #include <vespa/searchlib/common/resultset.h> #include <vespa/vespalib/util/sort.h> #include <algorithm> +#include <optional> #include <vector> namespace search::queryeval { @@ -121,6 +122,8 @@ private: VESPA_DLL_LOCAL void sortHitsByScore(size_t topn); VESPA_DLL_LOCAL void sortHitsByDocId(); + bool save_rank_scores() const noexcept { return _maxHitsSize != 0; } + public: HitCollector(const HitCollector &) = delete; HitCollector &operator=(const HitCollector &) = delete; @@ -164,15 +167,17 @@ public: const std::pair<Scores, Scores> &getRanges() const { return _ranges; } void setRanges(const std::pair<Scores, Scores> &ranges); + std::unique_ptr<ResultSet> + get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped); + /** * Returns a result set based on the content of this collector. * Invoking this method will destroy the heap property of the * ranked hits and the match data heap. * - * @param auto pointer to the result set - * @param default_value rank value to be used for results without rank value + * @return unique pointer to the result set **/ - std::unique_ptr<ResultSet> getResultSet(HitRank default_value = default_rank_value); + std::unique_ptr<ResultSet> getResultSet(); }; } |