diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-08-24 19:51:59 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-24 19:51:59 +0200 |
commit | fe557fc926fd40305d9f04f9fd749d39cfbc4ea1 (patch) | |
tree | 8bebe5ab2416c4cc52741eca6dab24e895373734 /client/go/cmd/testdata/src | |
parent | 8fe4a6cd8ad9b13d7ac0352854533a641871e2ed (diff) | |
parent | c292201c97b583448212442725e56499ed30b100 (diff) |
Merge pull request #18836 from vespa-engine/bratseth/cli-3
Bratseth/cli 3
Diffstat (limited to 'client/go/cmd/testdata/src')
3 files changed, 368 insertions, 0 deletions
diff --git a/client/go/cmd/testdata/src/main/application/hosts.xml b/client/go/cmd/testdata/src/main/application/hosts.xml new file mode 100644 index 00000000000..5dd3ed0dded --- /dev/null +++ b/client/go/cmd/testdata/src/main/application/hosts.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<hosts> + <host name="localhost"> + <alias>node1</alias> + </host> +</hosts> + diff --git a/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd b/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd new file mode 100644 index 00000000000..183e1a6421f --- /dev/null +++ b/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd @@ -0,0 +1,299 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +schema msmarco { + document msmarco { + + field id type string { + indexing: summary | attribute + } + + field title type string { + indexing: index | summary + index: enable-bm25 + stemming: best + } + + field url type string { + indexing: index | summary + } + + field body type string { + indexing: index | summary + index: enable-bm25 + summary: dynamic + stemming: best + } + + field title_word2vec type tensor<float>(x[500]) { + indexing: attribute + } + + field body_word2vec type tensor<float>(x[500]) { + indexing: attribute + } + + field title_gse type tensor<float>(x[512]) { + indexing: attribute + } + + field body_gse type tensor<float>(x[512]) { + indexing: attribute + } + + field title_bert type tensor<float>(x[768]) { + indexing: attribute + } + + field body_bert type tensor<float>(x[768]) { + indexing: attribute + } + + } + + document-summary minimal { + summary id type string {} + } + + fieldset default { + fields: title, body + } + + rank-profile default { + first-phase { + expression: nativeRank(title, body) + } + } + + rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + } + } + + rank-profile word2vec_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile gse_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_word2vec_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_gse_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_bm25_bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: 0.9005951 * bm25(title) + 2.2043643 * bm25(body) + 0.13506432 * dot_product_title() + 0.5840874 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_linear_bm25_gse_title_body_and inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: 0.12408562 * bm25(title) + 0.36673144 * bm25(body) + 6.2273498 * dot_product_title() + 5.671119 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_linear_bm25_gse_title_body_or inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: 0.7150663 * bm25(title) + 0.9480147 * bm25(body) + 1.560068 * dot_product_title() + 1.5062317 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile pointwise_linear_bm25 inherits default { + first-phase { + expression: 0.22499913 * bm25(title) + 0.07596389 * bm25(body) + } + } + + rank-profile listwise_linear_bm25 inherits default { + first-phase { + expression: 0.13446581 * bm25(title) + 0.5716889 * bm25(body) + } + } + + rank-profile collect_rank_features_embeddings inherits default { + function dot_product_title_word2vec() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body_word2vec() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + function dot_product_title_gse() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body_gse() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + function dot_product_title_bert() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body_bert() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: random + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + nativeRank(title) + nativeRank(body) + rankingExpression(dot_product_title_word2vec) + rankingExpression(dot_product_body_word2vec) + rankingExpression(dot_product_title_gse) + rankingExpression(dot_product_body_gse) + rankingExpression(dot_product_title_bert) + rankingExpression(dot_product_body_bert) + } + } + + rank-profile collect_rank_features inherits default { + first-phase { + expression: random + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + nativeRank(title) + nativeRank(body) + } + } +} diff --git a/client/go/cmd/testdata/src/main/application/services.xml b/client/go/cmd/testdata/src/main/application/services.xml new file mode 100644 index 00000000000..766434798f0 --- /dev/null +++ b/client/go/cmd/testdata/src/main/application/services.xml @@ -0,0 +1,61 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> + +<services version="1.0"> + + <container id="text_search" version="1.0"> + <document-api/> + <search> + + <!-- Config for bolding in search result snippets --> + <config name="container.qr-searchers"> + <tag> + <bold> + <open><strong></open> + <close></strong></close> + </bold> + <separator>...</separator> + </tag> + </config> + + </search> + <document-processing/> + + <component id="com.yahoo.language.simple.SimpleLinguistics"/> + + <handler id="ai.vespa.example.text_search.site.SiteHandler" bundle="text-search"> + <binding>http://*/site/*</binding> + <binding>http://*/site</binding> + <config name="ai.vespa.example.text_search.site.site-handler"> + <vespaHostName>localhost</vespaHostName> + <vespaHostPort>8080</vespaHostPort> + </config> + </handler> + + <nodes jvmargs="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=*:8998"> + <node hostalias="node1" /> + </nodes> + + </container> + + <content id="msmarco" version="1.0"> + + <!-- Config for search result snippets --> + <config name="vespa.config.search.summary.juniperrc"> + <max_matches>2</max_matches> + <length>1000</length> + <surround_max>500</surround_max> + <min_length>300</min_length> + </config> + + <redundancy>2</redundancy> + <documents> + <document type='msmarco' mode="index"/> + <document-processing cluster="text_search"/> + </documents> + <nodes> + <node distribution-key='0' hostalias='node1'/> + </nodes> + </content> + +</services> |