aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnstein Ressem <aressem@vespa.ai>2024-06-03 09:18:19 +0200
committerArnstein Ressem <aressem@vespa.ai>2024-06-03 09:18:19 +0200
commit6e455c397bb6966049ed5f8691e60c87d40c51bb (patch)
tree3b83b5ae53accab06de07d858eeb64712336c802
parent38ae0c486f951b78d1c7b128c8189fe8e5684a0c (diff)
parent8dc3cc1a3f2af0158a3b85098433dfd10129ede0 (diff)
Merge branch 'master' into aressem/test-valgrind
-rw-r--r--client/js/app/yarn.lock61
-rw-r--r--config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java4
-rw-r--r--config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java52
-rw-r--r--container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java2
-rw-r--r--dependency-versions/pom.xml8
-rw-r--r--jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java12
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_params.h2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp4
-rw-r--r--searchlib/src/tests/hitcollector/hitcollector_test.cpp81
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp163
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.h11
11 files changed, 340 insertions, 60 deletions
diff --git a/client/js/app/yarn.lock b/client/js/app/yarn.lock
index c0a1c8be5d3..72d9d848d27 100644
--- a/client/js/app/yarn.lock
+++ b/client/js/app/yarn.lock
@@ -830,6 +830,15 @@
resolved "https://registry.yarnpkg.com/@eslint-community/regexpp/-/regexpp-4.10.0.tgz#548f6de556857c8bb73bbee70c35dc82a2e74d63"
integrity sha512-Cu96Sd2By9mCNTx2iyKOmq10v22jUVQv0lQnlGNy16oE9589yE+QADPbrMGCkA51cKZSg3Pu/aTJVTGfL/qjUA==
+"@eslint/config-array@^0.15.1":
+ version "0.15.1"
+ resolved "https://registry.yarnpkg.com/@eslint/config-array/-/config-array-0.15.1.tgz#1fa78b422d98f4e7979f2211a1fde137e26c7d61"
+ integrity sha512-K4gzNq+yymn/EVsXYmf+SBcBro8MTf+aXJZUphM96CdzUEr+ClGDvAbpmaEK+cGVigVXIgs9gNmvHAlrzzY5JQ==
+ dependencies:
+ "@eslint/object-schema" "^2.1.3"
+ debug "^4.3.1"
+ minimatch "^3.0.5"
+
"@eslint/eslintrc@^3.1.0":
version "3.1.0"
resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-3.1.0.tgz#dbd3482bfd91efa663cbe7aa1f506839868207b6"
@@ -845,10 +854,15 @@
minimatch "^3.1.2"
strip-json-comments "^3.1.1"
-"@eslint/js@9.3.0":
- version "9.3.0"
- resolved "https://registry.yarnpkg.com/@eslint/js/-/js-9.3.0.tgz#2e8f65c9c55227abc4845b1513c69c32c679d8fe"
- integrity sha512-niBqk8iwv96+yuTwjM6bWg8ovzAPF9qkICsGtcoa5/dmqcEMfdwNAX7+/OHcJHc7wj7XqPxH98oAHytFYlw6Sw==
+"@eslint/js@9.4.0":
+ version "9.4.0"
+ resolved "https://registry.yarnpkg.com/@eslint/js/-/js-9.4.0.tgz#96a2edd37ec0551ce5f9540705be23951c008a0c"
+ integrity sha512-fdI7VJjP3Rvc70lC4xkFXHB0fiPeojiL1PxVG6t1ZvXQrarj893PweuBTujxDUFk0Fxj4R7PIIAZ/aiiyZPZcg==
+
+"@eslint/object-schema@^2.1.3":
+ version "2.1.3"
+ resolved "https://registry.yarnpkg.com/@eslint/object-schema/-/object-schema-2.1.3.tgz#e65ae80ee2927b4fd8c5c26b15ecacc2b2a6cc2a"
+ integrity sha512-HAbhAYKfsAC2EkTqve00ibWIZlaU74Z1EHwAjYr4PXF0YU2VEA1zSIKSSpKszRLRWwHzzRZXvK632u+uXzvsvw==
"@floating-ui/core@^1.4.2":
version "1.5.0"
@@ -919,25 +933,11 @@
dependencies:
prop-types "^15.8.1"
-"@humanwhocodes/config-array@^0.13.0":
- version "0.13.0"
- resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.13.0.tgz#fb907624df3256d04b9aa2df50d7aa97ec648748"
- integrity sha512-DZLEEqFWQFiyK6h5YIeynKx7JlvCYWL0cImfSRXZ9l4Sg2efkFGTuFf6vzXjK1cq6IYkU+Eg/JizXw+TD2vRNw==
- dependencies:
- "@humanwhocodes/object-schema" "^2.0.3"
- debug "^4.3.1"
- minimatch "^3.0.5"
-
"@humanwhocodes/module-importer@^1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz#af5b2691a22b44be847b0ca81641c5fb6ad0172c"
integrity sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==
-"@humanwhocodes/object-schema@^2.0.3":
- version "2.0.3"
- resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz#4a2868d75d6d6963e423bcf90b7fd1be343409d3"
- integrity sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==
-
"@humanwhocodes/retry@^0.3.0":
version "0.3.0"
resolved "https://registry.yarnpkg.com/@humanwhocodes/retry/-/retry-0.3.0.tgz#6d86b8cb322660f03d3f0aa94b99bdd8e172d570"
@@ -2324,13 +2324,20 @@ debug@^3.2.7:
dependencies:
ms "^2.1.1"
-debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2:
+debug@^4.1.0, debug@^4.1.1:
version "4.3.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
+debug@^4.3.1, debug@^4.3.2:
+ version "4.3.5"
+ resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.5.tgz#e83444eceb9fedd4a1da56d671ae2446a01a6e1e"
+ integrity sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg==
+ dependencies:
+ ms "2.1.2"
+
decode-uri-component@^0.2.0:
version "0.2.2"
resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9"
@@ -2736,15 +2743,15 @@ eslint-visitor-keys@^4.0.0:
integrity sha512-OtIRv/2GyiF6o/d8K7MYKKbXrOUBIK6SfkIRM4Z0dY3w+LiQ0vy3F57m0Z71bjbyeiWFiHJ8brqnmE6H6/jEuw==
eslint@^9.0.0:
- version "9.3.0"
- resolved "https://registry.yarnpkg.com/eslint/-/eslint-9.3.0.tgz#36a96db84592618d6ed9074d677e92f4e58c08b9"
- integrity sha512-5Iv4CsZW030lpUqHBapdPo3MJetAPtejVW8B84GIcIIv8+ohFaddXsrn1Gn8uD9ijDb+kcYKFUVmC8qG8B2ORQ==
+ version "9.4.0"
+ resolved "https://registry.yarnpkg.com/eslint/-/eslint-9.4.0.tgz#79150c3610ae606eb131f1d648d5f43b3d45f3cd"
+ integrity sha512-sjc7Y8cUD1IlwYcTS9qPSvGjAC8Ne9LctpxKKu3x/1IC9bnOg98Zy6GxEJUfr1NojMgVPlyANXYns8oE2c1TAA==
dependencies:
"@eslint-community/eslint-utils" "^4.2.0"
"@eslint-community/regexpp" "^4.6.1"
+ "@eslint/config-array" "^0.15.1"
"@eslint/eslintrc" "^3.1.0"
- "@eslint/js" "9.3.0"
- "@humanwhocodes/config-array" "^0.13.0"
+ "@eslint/js" "9.4.0"
"@humanwhocodes/module-importer" "^1.0.1"
"@humanwhocodes/retry" "^0.3.0"
"@nodelib/fs.walk" "^1.2.8"
@@ -4721,9 +4728,9 @@ prettier-linter-helpers@^1.0.0:
fast-diff "^1.1.2"
prettier@3:
- version "3.2.5"
- resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.2.5.tgz#e52bc3090586e824964a8813b09aba6233b28368"
- integrity sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A==
+ version "3.3.0"
+ resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.3.0.tgz#d173ea0524a691d4c0b1181752f2b46724328cdf"
+ integrity sha512-J9odKxERhCQ10OC2yb93583f6UnYutOeiV5i0zEDS7UGTdUt0u+y8erxl3lBKvwo/JHyyoEdXjwp4dke9oyZ/g==
pretty-format@^29.7.0:
version "29.7.0"
diff --git a/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java b/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java
index 5f2046b1450..fe4c0af06d6 100644
--- a/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java
+++ b/config-application-package/src/main/java/com/yahoo/config/application/OverrideProcessor.java
@@ -194,6 +194,10 @@ class OverrideProcessor implements PreProcessor {
if ( ! elementTags.isEmpty()) { // match tags
if ( ! elementTags.intersects(tags)) return false;
+ // Tags are set on instances. Having a tag match for a deployment to a non-prod environment
+ // disables the usual downscaling of the cluster, which is surprising. We therefore either
+ // require the tags match to either also match an environment directive, or the implicit prod.
+ if (elementEnvironments.isEmpty() && environment != Environment.prod) return false;
}
return true;
diff --git a/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java b/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java
index 0cdbed3999c..7190b25965f 100644
--- a/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java
+++ b/config-application-package/src/test/java/com/yahoo/config/application/HostedOverrideProcessorTagsTest.java
@@ -22,7 +22,7 @@ public class HostedOverrideProcessorTagsTest {
"<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" +
" <container id='foo' version='1.0'>" +
" <nodes count='5' deploy:tags='a' deploy:environment='perf'/>" +
- " <nodes count='10' deploy:tags='a b'/>" +
+ " <nodes count='10' deploy:tags='a b' deploy:environment='prod dev'/>" +
" <nodes count='20' deploy:tags='c'/>" +
" <search deploy:tags='b'/>" +
" <document-api deploy:tags='d'/>" +
@@ -62,6 +62,38 @@ public class HostedOverrideProcessorTagsTest {
}
@Test
+ public void testParsingTagATest() throws TransformerException {
+ String expected =
+ "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" +
+ "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" +
+ " <container id='foo' version='1.0'>" +
+ " " + // (╭ರ_•́)
+ " </container>" +
+ "</services>";
+ assertOverride(InstanceName.defaultName(),
+ Environment.test,
+ RegionName.defaultName(),
+ Tags.fromString("a"),
+ expected);
+ }
+
+ @Test
+ public void testParsingTagADev() throws TransformerException {
+ String expected =
+ "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" +
+ "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" +
+ " <container id='foo' version='1.0'>" +
+ " <nodes count='10' required='true'/>" +
+ " </container>" +
+ "</services>";
+ assertOverride(InstanceName.defaultName(),
+ Environment.dev,
+ RegionName.defaultName(),
+ Tags.fromString("a"),
+ expected);
+ }
+
+ @Test
public void testParsingTagB() throws TransformerException {
String expected =
"<?xml version='1.0' encoding='UTF-8' standalone='no'?>" +
@@ -79,7 +111,7 @@ public class HostedOverrideProcessorTagsTest {
}
@Test
- public void testParsingTagC() throws TransformerException {
+ public void testParsingTagCProd() throws TransformerException {
String expected =
"<?xml version='1.0' encoding='UTF-8' standalone='no'?>" +
"<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" +
@@ -95,6 +127,22 @@ public class HostedOverrideProcessorTagsTest {
}
@Test
+ public void testParsingTagCDev() throws TransformerException {
+ String expected =
+ "<?xml version='1.0' encoding='UTF-8' standalone='no'?>" +
+ "<services xmlns:deploy='vespa' xmlns:preprocess='?' version='1.0'>" +
+ " <container id='foo' version='1.0'>" +
+ " " + // (╭ರ_•́)
+ " </container>" +
+ "</services>";
+ assertOverride(InstanceName.defaultName(),
+ Environment.dev,
+ RegionName.defaultName(),
+ Tags.fromString("c"),
+ expected);
+ }
+
+ @Test
public void testParsingTagCAndD() throws TransformerException {
String expected =
"<?xml version='1.0' encoding='UTF-8' standalone='no'?>" +
diff --git a/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java b/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java
index c0a677b2094..2cfb6552379 100644
--- a/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/rendering/EventRendererTestCase.java
@@ -141,7 +141,7 @@ public class EventRendererTestCase {
});
assertFalse(future.isDone());
result = render(new Result(new Query(), newHitGroup(tokenStream, "token_stream")));
- assertTrue(future.isDone()); // Renderer waits for async completion
+ future.join(); // Renderer waits for async completion
} finally {
executor.shutdownNow();
diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml
index 7e04fe314b4..c5f5d8b832f 100644
--- a/dependency-versions/pom.xml
+++ b/dependency-versions/pom.xml
@@ -33,7 +33,7 @@
<!-- DO NOT UPGRADE THESE TO A NEW MAJOR VERSION WITHOUT CHECKING FOR BINARY COMPATIBILITY -->
<aopalliance.vespa.version>1.0</aopalliance.vespa.version>
- <error-prone-annotations.vespa.version>2.27.1</error-prone-annotations.vespa.version>
+ <error-prone-annotations.vespa.version>2.28.0</error-prone-annotations.vespa.version>
<guava.vespa.version>33.2.0-jre</guava.vespa.version>
<guice.vespa.version>6.0.0</guice.vespa.version>
<j2objc-annotations.vespa.version>3.0.0</j2objc-annotations.vespa.version>
@@ -68,7 +68,7 @@
<assertj.vespa.version>3.26.0</assertj.vespa.version>
<!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories -->
- <aws-sdk.vespa.version>1.12.733</aws-sdk.vespa.version>
+ <aws-sdk.vespa.version>1.12.734</aws-sdk.vespa.version>
<athenz.vespa.version>1.11.59</athenz.vespa.version>
<!-- Athenz END -->
@@ -178,9 +178,9 @@
<maven-gpg-plugin.vespa.version>3.2.4</maven-gpg-plugin.vespa.version>
<maven-install-plugin.vespa.version>3.1.2</maven-install-plugin.vespa.version>
<maven-jar-plugin.vespa.version>3.4.1</maven-jar-plugin.vespa.version>
- <maven-javadoc-plugin.vespa.version>3.6.3</maven-javadoc-plugin.vespa.version>
+ <maven-javadoc-plugin.vespa.version>3.7.0</maven-javadoc-plugin.vespa.version>
<maven-plugin-api.vespa.version>${maven-core.vespa.version}</maven-plugin-api.vespa.version>
- <maven-plugin-tools.vespa.version>3.13.0</maven-plugin-tools.vespa.version>
+ <maven-plugin-tools.vespa.version>3.13.1</maven-plugin-tools.vespa.version>
<maven-resources-plugin.vespa.version>3.3.1</maven-resources-plugin.vespa.version>
<maven-resolver.vespa.version>1.9.20</maven-resolver.vespa.version>
<maven-shade-plugin.vespa.version>3.5.3</maven-shade-plugin.vespa.version>
diff --git a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java
index 607e3482c3d..616b9df4bd8 100644
--- a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java
+++ b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/User.java
@@ -35,6 +35,7 @@ public record User(String email, String name, String nickname, String picture, b
}
public static Builder builder() { return new Builder(); }
+ public static Builder builder(User u) { return new Builder(u); }
public static class Builder {
private String email;
@@ -48,6 +49,17 @@ public record User(String email, String name, String nickname, String picture, b
private Builder() {}
+ private Builder(User u) {
+ email = u.email;
+ name = u.name;
+ nickname = u.nickname;
+ picture = u.picture;
+ isVerified = u.isVerified;
+ loginCount = u.loginCount;
+ lastLogin = u.lastLogin;
+ extraAttributes.putAll(u.extraAttributes);
+ }
+
public Builder email(String email) { this.email = email; return this; }
public Builder name(String name) { this.name = name; return this; }
public Builder nickname(String nickname) { this.nickname = nickname; return this; }
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.h b/searchcore/src/vespa/searchcore/proton/matching/match_params.h
index 5b58c11b7e1..f9dd55e7bb1 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_params.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.h
@@ -27,7 +27,7 @@ struct MatchParams {
uint32_t hits_in,
bool hasFinalRank,
bool needRanking=true);
- bool save_rank_scores() const { return ((heapSize + arraySize) != 0); }
+ bool save_rank_scores() const noexcept { return (arraySize != 0); }
bool has_rank_drop_limit() const;
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 211e67f1e2b..6b443231c0a 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -367,7 +367,7 @@ MatchThread::findMatches(MatchTools &tools)
tools.give_back_search(ProfiledIterator::profile(*match_profiler, tools.borrow_search()));
tools.tag_search_as_changed();
}
- HitCollector hits(matchParams.numDocs, matchParams.arraySize);
+ HitCollector hits(matchParams.numDocs, match_with_ranking ? matchParams.arraySize : 0);
trace->addEvent(4, "Start match and first phase rank");
/**
* All, or none of the threads in the bundle must execute the match loop.
@@ -380,7 +380,7 @@ MatchThread::findMatches(MatchTools &tools)
secondPhase(tools, hits);
}
trace->addEvent(4, "Create result set");
- return hits.getResultSet(fallback_rank_value());
+ return hits.getResultSet();
}
void
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
index 3ac5f419228..60daa571f1d 100644
--- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp
+++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
@@ -13,6 +13,8 @@ using namespace search::fef;
using namespace search::queryeval;
using ScoreMap = std::map<uint32_t, feature_t>;
+using DocidVector = std::vector<uint32_t>;
+using RankedHitVector = std::vector<RankedHit>;
using Ranges = std::pair<Scores, Scores>;
@@ -574,4 +576,83 @@ TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_only_after_pa
checkResult(*rs, nullptr);
}
+struct RankDropFixture {
+ uint32_t _docid_limit;
+ HitCollector _hc;
+ std::vector<uint32_t> _dropped;
+ RankDropFixture(uint32_t docid_limit, uint32_t max_hits_size)
+ : _docid_limit(docid_limit),
+ _hc(docid_limit, max_hits_size)
+ {
+ }
+ void add(std::vector<RankedHit> hits) {
+ for (const auto& hit : hits) {
+ _hc.addHit(hit.getDocId(), hit.getRank());
+ }
+ }
+ void rerank(ScoreMap score_map, size_t count) {
+ PredefinedScorer scorer(score_map);
+ EXPECT_EQ(count, do_reRank(scorer, _hc, count));
+ }
+ std::unique_ptr<BitVector> make_bv(DocidVector docids) {
+ auto bv = BitVector::create(_docid_limit);
+ for (auto& docid : docids) {
+ bv->setBit(docid);
+ }
+ return bv;
+ }
+
+ void setup() {
+ // Initial 7 hits from first phase
+ add({{5, 1100},{10, 1200},{11, 1300},{12, 1400},{14, 500},{15, 900},{16,1000}});
+ // Rerank two best hits, calculate old and new ranges for reranked
+ // hits that will cause hits not reranked to later be rescored by
+ // dividing by 100.
+ rerank({{11,14},{12,13}}, 2);
+ }
+ void check_result(std::optional<double> rank_drop_limit, RankedHitVector exp_array,
+ std::unique_ptr<BitVector> exp_bv, DocidVector exp_dropped) {
+ auto rs = _hc.get_result_set(rank_drop_limit, &_dropped);
+ checkResult(*rs, exp_array);
+ checkResult(*rs, exp_bv.get());
+ EXPECT_EQ(exp_dropped, _dropped);
+ }
+};
+
+TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced)
+{
+ // Track rank score for all 7 hits from first phase
+ RankDropFixture f(10000, 10);
+ f.setup();
+ f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13},{16,10}},
+ {}, {14, 15});
+}
+
+TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced_when_docid_vector_is_used)
+{
+ // Track rank score for 4 best hits from first phase, overflow to docid vector
+ RankDropFixture f(10000, 4);
+ f.setup();
+ f.check_result(13.0, {{11,14}},
+ {}, {5,10,12,14,15,16});
+}
+
+TEST(HitCollectorTest, require_that_bitvector_is_not_dropped_without_second_phase_rank_drop_limit)
+{
+ // Track rank score for 4 best hits from first phase, overflow to bitvector
+ RankDropFixture f(20, 4);
+ f.setup();
+ f.check_result(std::nullopt, {{5,11},{10,12},{11,14},{12,13}},
+ f.make_bv({5,10,11,12,14,15,16}), {});
+}
+
+TEST(HitCollectorTest, require_that_bitvector_is_dropped_with_second_phase_rank_drop_limit)
+{
+ // Track rank for 4 best hits from first phase, overflow to bitvector
+ RankDropFixture f(20, 4);
+ f.setup();
+ f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13}},
+ {}, {14,15,16});
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
index c1d59463ad9..01587ef485a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -219,71 +219,180 @@ public:
}
};
-template <typename Rescorer>
+class SimpleHitAdder {
+protected:
+ ResultSet& _rs;
+public:
+ SimpleHitAdder(ResultSet& rs)
+ : _rs(rs)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ _rs.push_back({docid, rank_value});
+ }
+};
+
+class ConditionalHitAdder : public SimpleHitAdder {
+protected:
+ double _second_phase_rank_drop_limit;
+public:
+ ConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit)
+ : SimpleHitAdder(rs),
+ _second_phase_rank_drop_limit(second_phase_rank_drop_limit)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ if (rank_value > _second_phase_rank_drop_limit) {
+ _rs.push_back({docid, rank_value});
+ }
+ }
+};
+
+class TrackingConditionalHitAdder : public ConditionalHitAdder {
+ std::vector<uint32_t>& _dropped;
+public:
+ TrackingConditionalHitAdder(ResultSet& rs, double second_phase_rank_drop_limit, std::vector<uint32_t>& dropped)
+ : ConditionalHitAdder(rs, second_phase_rank_drop_limit),
+ _dropped(dropped)
+ {
+ }
+ void add(uint32_t docid, double rank_value) {
+ if (rank_value > _second_phase_rank_drop_limit) {
+ _rs.push_back({docid, rank_value});
+ } else {
+ _dropped.emplace_back(docid);
+ }
+ }
+};
+
+template <typename HitAdder, typename Rescorer>
void
-add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, Rescorer rescorer)
+add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, Rescorer rescorer)
{
for (auto& hit : hits) {
- rs.push_back({hit.first, rescorer.rescore(hit.first, hit.second)});
+ hit_adder.add(hit.first, rescorer.rescore(hit.first, hit.second));
}
}
-template <typename Rescorer>
+template <typename HitAdder, typename Rescorer>
void
-add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
+add_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
{
if (reranked_hits.empty()) {
- add_rescored_hits(rs, hits, rescorer);
+ add_rescored_hits(hit_adder, hits, rescorer);
} else {
- add_rescored_hits(rs, hits, RerankRescorer(rescorer, reranked_hits));
+ add_rescored_hits(hit_adder, hits, RerankRescorer(rescorer, reranked_hits));
}
}
template <typename Rescorer>
void
-mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, Rescorer rescorer)
+add_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer)
+{
+ if (second_phase_rank_drop_limit.has_value()) {
+ if (dropped != nullptr) {
+ add_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, reranked_hits, rescorer);
+ } else {
+ add_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, reranked_hits, rescorer);
+ }
+ } else {
+ add_rescored_hits(SimpleHitAdder(rs), hits, reranked_hits, rescorer);
+ }
+}
+
+template <typename HitAdder, typename Rescorer>
+void
+mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, Rescorer rescorer)
{
auto hits_cur = hits.begin();
auto hits_end = hits.end();
for (auto docid : docids) {
if (hits_cur != hits_end && docid == hits_cur->first) {
- rs.push_back({docid, rescorer.rescore(docid, hits_cur->second)});
+ hit_adder.add(docid, rescorer.rescore(docid, hits_cur->second));
++hits_cur;
} else {
- rs.push_back({docid, default_value});
+ hit_adder.add(docid, default_value);
}
}
}
-template <typename Rescorer>
+template <typename HitAdder, typename Rescorer>
void
-mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
+mixin_rescored_hits(HitAdder hit_adder, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, Rescorer rescorer)
{
if (reranked_hits.empty()) {
- mixin_rescored_hits(rs, hits, docids, default_value, rescorer);
+ mixin_rescored_hits(hit_adder, hits, docids, default_value, rescorer);
} else {
- mixin_rescored_hits(rs, hits, docids, default_value, RerankRescorer(rescorer, reranked_hits));
+ mixin_rescored_hits(hit_adder, hits, docids, default_value, RerankRescorer(rescorer, reranked_hits));
+ }
+}
+
+template <typename Rescorer>
+void
+mixin_rescored_hits(ResultSet& rs, const std::vector<HitCollector::Hit>& hits, const std::vector<uint32_t>& docids, double default_value, const std::vector<HitCollector::Hit>& reranked_hits, std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped, Rescorer rescorer)
+{
+ if (second_phase_rank_drop_limit.has_value()) {
+ if (dropped != nullptr) {
+ mixin_rescored_hits(TrackingConditionalHitAdder(rs, second_phase_rank_drop_limit.value(), *dropped), hits, docids, default_value, reranked_hits, rescorer);
+ } else {
+ mixin_rescored_hits(ConditionalHitAdder(rs, second_phase_rank_drop_limit.value()), hits, docids, default_value, reranked_hits, rescorer);
+ }
+ } else {
+ mixin_rescored_hits(SimpleHitAdder(rs), hits, docids, default_value, reranked_hits, rescorer);
+ }
+}
+
+void
+add_bitvector_to_dropped(std::vector<uint32_t>& dropped, vespalib::ConstArrayRef<RankedHit> hits, const BitVector& bv)
+{
+ auto hits_cur = hits.begin();
+ auto hits_end = hits.end();
+ auto docid = bv.getFirstTrueBit();
+ auto docid_limit = bv.size();
+ while (docid < docid_limit) {
+ if (hits_cur != hits_end && hits_cur->getDocId() == docid) {
+ ++hits_cur;
+ } else {
+ dropped.emplace_back(docid);
+ }
+ docid = bv.getNextTrueBit(docid + 1);
}
}
}
std::unique_ptr<ResultSet>
-HitCollector::getResultSet(HitRank default_value)
+HitCollector::get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped)
{
+ /*
+ * Use default_rank_value (i.e. -HUGE_VAL) when hit collector saves
+ * rank scores, otherwise use zero_rank_value (i.e. 0.0).
+ */
+ auto default_value = save_rank_scores() ? search::default_rank_value : search::zero_rank_value;
+
bool needReScore = FirstPhaseRescorer::need_rescore(_ranges);
FirstPhaseRescorer rescorer(_ranges);
+ if (dropped != nullptr) {
+ dropped->clear();
+ }
+
// destroys the heap property or score sort order
sortHitsByDocId();
auto rs = std::make_unique<ResultSet>();
- if ( ! _collector->isDocIdCollector() ) {
+ if ( ! _collector->isDocIdCollector() ||
+ (second_phase_rank_drop_limit.has_value() &&
+ (_bitVector || dropped == nullptr))) {
rs->allocArray(_hits.size());
+ auto* dropped_or_null = dropped;
+ if (second_phase_rank_drop_limit.has_value() && _bitVector) {
+ dropped_or_null = nullptr;
+ }
if (needReScore) {
- add_rescored_hits(*rs, _hits, _reRankedHits, rescorer);
+ add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, rescorer);
} else {
- add_rescored_hits(*rs, _hits, _reRankedHits, NoRescorer());
+ add_rescored_hits(*rs, _hits, _reRankedHits, second_phase_rank_drop_limit, dropped_or_null, NoRescorer());
}
} else {
if (_unordered) {
@@ -291,12 +400,20 @@ HitCollector::getResultSet(HitRank default_value)
}
rs->allocArray(_docIdVector.size());
if (needReScore) {
- mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, rescorer);
+ mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, rescorer);
} else {
- mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, NoRescorer());
+ mixin_rescored_hits(*rs, _hits, _docIdVector, default_value, _reRankedHits, second_phase_rank_drop_limit, dropped, NoRescorer());
}
}
+ if (second_phase_rank_drop_limit.has_value() && _bitVector) {
+ if (dropped != nullptr) {
+ assert(dropped->empty());
+ add_bitvector_to_dropped(*dropped, {rs->getArray(), rs->getArrayUsed()}, *_bitVector);
+ }
+ _bitVector.reset();
+ }
+
if (_bitVector) {
rs->setBitOverflow(std::move(_bitVector));
}
@@ -304,4 +421,10 @@ HitCollector::getResultSet(HitRank default_value)
return rs;
}
+std::unique_ptr<ResultSet>
+HitCollector::getResultSet()
+{
+ return get_result_set(std::nullopt, nullptr);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
index 903c2ab5b13..c23fb0a6ef6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
@@ -8,6 +8,7 @@
#include <vespa/searchlib/common/resultset.h>
#include <vespa/vespalib/util/sort.h>
#include <algorithm>
+#include <optional>
#include <vector>
namespace search::queryeval {
@@ -121,6 +122,8 @@ private:
VESPA_DLL_LOCAL void sortHitsByScore(size_t topn);
VESPA_DLL_LOCAL void sortHitsByDocId();
+ bool save_rank_scores() const noexcept { return _maxHitsSize != 0; }
+
public:
HitCollector(const HitCollector &) = delete;
HitCollector &operator=(const HitCollector &) = delete;
@@ -164,15 +167,17 @@ public:
const std::pair<Scores, Scores> &getRanges() const { return _ranges; }
void setRanges(const std::pair<Scores, Scores> &ranges);
+ std::unique_ptr<ResultSet>
+ get_result_set(std::optional<double> second_phase_rank_drop_limit, std::vector<uint32_t>* dropped);
+
/**
* Returns a result set based on the content of this collector.
* Invoking this method will destroy the heap property of the
* ranked hits and the match data heap.
*
- * @param auto pointer to the result set
- * @param default_value rank value to be used for results without rank value
+ * @return unique pointer to the result set
**/
- std::unique_ptr<ResultSet> getResultSet(HitRank default_value = default_rank_value);
+ std::unique_ptr<ResultSet> getResultSet();
};
}