aboutsummaryrefslogtreecommitdiffstats
path: root/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd
diff options
context:
space:
mode:
Diffstat (limited to 'client/go/cmd/testdata/src/main/application/schemas/msmarco.sd')
-rw-r--r--client/go/cmd/testdata/src/main/application/schemas/msmarco.sd299
1 files changed, 299 insertions, 0 deletions
diff --git a/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd b/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd
new file mode 100644
index 00000000000..183e1a6421f
--- /dev/null
+++ b/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd
@@ -0,0 +1,299 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+schema msmarco {
+ document msmarco {
+
+ field id type string {
+ indexing: summary | attribute
+ }
+
+ field title type string {
+ indexing: index | summary
+ index: enable-bm25
+ stemming: best
+ }
+
+ field url type string {
+ indexing: index | summary
+ }
+
+ field body type string {
+ indexing: index | summary
+ index: enable-bm25
+ summary: dynamic
+ stemming: best
+ }
+
+ field title_word2vec type tensor<float>(x[500]) {
+ indexing: attribute
+ }
+
+ field body_word2vec type tensor<float>(x[500]) {
+ indexing: attribute
+ }
+
+ field title_gse type tensor<float>(x[512]) {
+ indexing: attribute
+ }
+
+ field body_gse type tensor<float>(x[512]) {
+ indexing: attribute
+ }
+
+ field title_bert type tensor<float>(x[768]) {
+ indexing: attribute
+ }
+
+ field body_bert type tensor<float>(x[768]) {
+ indexing: attribute
+ }
+
+ }
+
+ document-summary minimal {
+ summary id type string {}
+ }
+
+ fieldset default {
+ fields: title, body
+ }
+
+ rank-profile default {
+ first-phase {
+ expression: nativeRank(title, body)
+ }
+ }
+
+ rank-profile bm25 inherits default {
+ first-phase {
+ expression: bm25(title) + bm25(body)
+ }
+ }
+
+ rank-profile word2vec_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor)*attribute(title_word2vec))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor)*attribute(body_word2vec))
+ }
+ first-phase {
+ expression: dot_product_title() + dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile gse_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_gse)*attribute(title_gse))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_gse)*attribute(body_gse))
+ }
+ first-phase {
+ expression: dot_product_title() + dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile bert_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_bert)*attribute(title_bert))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_bert)*attribute(body_bert))
+ }
+ first-phase {
+ expression: dot_product_title() + dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile bm25_word2vec_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor)*attribute(title_word2vec))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor)*attribute(body_word2vec))
+ }
+ first-phase {
+ expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile bm25_gse_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_gse)*attribute(title_gse))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_gse)*attribute(body_gse))
+ }
+ first-phase {
+ expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile bm25_bert_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_bert)*attribute(title_bert))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_bert)*attribute(body_bert))
+ }
+ first-phase {
+ expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile listwise_bm25_bert_title_body_all inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_bert)*attribute(title_bert))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_bert)*attribute(body_bert))
+ }
+ first-phase {
+ expression: 0.9005951 * bm25(title) + 2.2043643 * bm25(body) + 0.13506432 * dot_product_title() + 0.5840874 * dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile listwise_linear_bm25_gse_title_body_and inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_gse)*attribute(title_gse))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_gse)*attribute(body_gse))
+ }
+ first-phase {
+ expression: 0.12408562 * bm25(title) + 0.36673144 * bm25(body) + 6.2273498 * dot_product_title() + 5.671119 * dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile listwise_linear_bm25_gse_title_body_or inherits default {
+ function dot_product_title() {
+ expression: sum(query(tensor_gse)*attribute(title_gse))
+ }
+ function dot_product_body() {
+ expression: sum(query(tensor_gse)*attribute(body_gse))
+ }
+ first-phase {
+ expression: 0.7150663 * bm25(title) + 0.9480147 * bm25(body) + 1.560068 * dot_product_title() + 1.5062317 * dot_product_body()
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ rankingExpression(dot_product_title)
+ rankingExpression(dot_product_body)
+ }
+ }
+
+ rank-profile pointwise_linear_bm25 inherits default {
+ first-phase {
+ expression: 0.22499913 * bm25(title) + 0.07596389 * bm25(body)
+ }
+ }
+
+ rank-profile listwise_linear_bm25 inherits default {
+ first-phase {
+ expression: 0.13446581 * bm25(title) + 0.5716889 * bm25(body)
+ }
+ }
+
+ rank-profile collect_rank_features_embeddings inherits default {
+ function dot_product_title_word2vec() {
+ expression: sum(query(tensor)*attribute(title_word2vec))
+ }
+ function dot_product_body_word2vec() {
+ expression: sum(query(tensor)*attribute(body_word2vec))
+ }
+ function dot_product_title_gse() {
+ expression: sum(query(tensor_gse)*attribute(title_gse))
+ }
+ function dot_product_body_gse() {
+ expression: sum(query(tensor_gse)*attribute(body_gse))
+ }
+ function dot_product_title_bert() {
+ expression: sum(query(tensor_bert)*attribute(title_bert))
+ }
+ function dot_product_body_bert() {
+ expression: sum(query(tensor_bert)*attribute(body_bert))
+ }
+ first-phase {
+ expression: random
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ nativeRank(title)
+ nativeRank(body)
+ rankingExpression(dot_product_title_word2vec)
+ rankingExpression(dot_product_body_word2vec)
+ rankingExpression(dot_product_title_gse)
+ rankingExpression(dot_product_body_gse)
+ rankingExpression(dot_product_title_bert)
+ rankingExpression(dot_product_body_bert)
+ }
+ }
+
+ rank-profile collect_rank_features inherits default {
+ first-phase {
+ expression: random
+ }
+ ignore-default-rank-features
+ rank-features {
+ bm25(title)
+ bm25(body)
+ nativeRank(title)
+ nativeRank(body)
+ }
+ }
+}