diff options
Diffstat (limited to 'client/go/cmd/testdata/src/main/application/schemas')
-rw-r--r-- | client/go/cmd/testdata/src/main/application/schemas/msmarco.sd | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd b/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd new file mode 100644 index 00000000000..183e1a6421f --- /dev/null +++ b/client/go/cmd/testdata/src/main/application/schemas/msmarco.sd @@ -0,0 +1,299 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +schema msmarco { + document msmarco { + + field id type string { + indexing: summary | attribute + } + + field title type string { + indexing: index | summary + index: enable-bm25 + stemming: best + } + + field url type string { + indexing: index | summary + } + + field body type string { + indexing: index | summary + index: enable-bm25 + summary: dynamic + stemming: best + } + + field title_word2vec type tensor<float>(x[500]) { + indexing: attribute + } + + field body_word2vec type tensor<float>(x[500]) { + indexing: attribute + } + + field title_gse type tensor<float>(x[512]) { + indexing: attribute + } + + field body_gse type tensor<float>(x[512]) { + indexing: attribute + } + + field title_bert type tensor<float>(x[768]) { + indexing: attribute + } + + field body_bert type tensor<float>(x[768]) { + indexing: attribute + } + + } + + document-summary minimal { + summary id type string {} + } + + fieldset default { + fields: title, body + } + + rank-profile default { + first-phase { + expression: nativeRank(title, body) + } + } + + rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + } + } + + rank-profile word2vec_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile gse_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_word2vec_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_gse_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_bm25_bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: 0.9005951 * bm25(title) + 2.2043643 * bm25(body) + 0.13506432 * dot_product_title() + 0.5840874 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_linear_bm25_gse_title_body_and inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: 0.12408562 * bm25(title) + 0.36673144 * bm25(body) + 6.2273498 * dot_product_title() + 5.671119 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_linear_bm25_gse_title_body_or inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: 0.7150663 * bm25(title) + 0.9480147 * bm25(body) + 1.560068 * dot_product_title() + 1.5062317 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile pointwise_linear_bm25 inherits default { + first-phase { + expression: 0.22499913 * bm25(title) + 0.07596389 * bm25(body) + } + } + + rank-profile listwise_linear_bm25 inherits default { + first-phase { + expression: 0.13446581 * bm25(title) + 0.5716889 * bm25(body) + } + } + + rank-profile collect_rank_features_embeddings inherits default { + function dot_product_title_word2vec() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body_word2vec() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + function dot_product_title_gse() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body_gse() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + function dot_product_title_bert() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body_bert() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: random + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + nativeRank(title) + nativeRank(body) + rankingExpression(dot_product_title_word2vec) + rankingExpression(dot_product_body_word2vec) + rankingExpression(dot_product_title_gse) + rankingExpression(dot_product_body_gse) + rankingExpression(dot_product_title_bert) + rankingExpression(dot_product_body_bert) + } + } + + rank-profile collect_rank_features inherits default { + first-phase { + expression: random + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + nativeRank(title) + nativeRank(body) + } + } +} |