summaryrefslogtreecommitdiffstats
path: root/config-model/src/test/derived/documentderiver/newssummary.sd
blob: c0fcf8c2d85dedaa13362071984bf163db35367b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
search newssummary {

  document newssummary {

    field title type string {
      indexing: summary | index
      # index-to: title, titleabstract, default
      stemming: none
      alias: headline
    }

    field abstract type string {
      indexing: summary | index
      # index-to: abstract, titleabstract, default
      stemming: none
    }

    field sourcename type string {
      indexing: summary | index
      # index-to: sourcename, source
      stemming: none
    }

    field providername type string {
      indexing: summary | index
      # index-to: providername, source
      stemming: none
      alias: provider
    }

    field thumburl type string {
      indexing: summary | lowercase | tokenize | index
      stemming: none
    }

    field thumbwidth type int {
      indexing: summary | index
    }

    field thumbheight type int {
      indexing: summary | index
    }

    field language type string {
      indexing: summary | index
      # index-to: language, languages
      stemming: none
    }

    field crawldocid type string {
      indexing: summary
      stemming: none
    }

    field url type uri {
      indexing: summary | lowercase | tokenize | index
      stemming: none
    }

    field sourceurl type uri {
      indexing: summary | lowercase | tokenize | index
      stemming: none
    }

    field categories type string {
      indexing: summary | index
      stemming: none
      alias: category
      alias: cat
    }

    field pubdate type long {
      indexing: summary | index | attribute pubdate | set_var tmppubdate
      alias: date
    }

    field expdate type long {
      indexing: summary | index
    }

    field fingerprint type int {
      indexing: summary | index 
    }

    field debug type string {
      indexing {

        # Initialize variables used for superduper ranking
        0 | set_var superduperus | set_var superdupereu | set_var superduperasia;

        input debug | lowercase | summary | normalize | tokenize | index;
        input debug | lowercase | split ";" | for_each {
          # Loop through each token in debug string
          switch {
            case "superduperus": 10 | set_var superduperus;
            case "superdupereu": 10 | set_var superdupereu;
            case "superduperasia": 10 | set_var superduperasia;
          }
        };  
      }
      indexing-rewrite: none
      stemming: none
    }

    field attributes type string {
      indexing {

        # Initialize variables used for superduper ranking
        1 | set_var superdupermod;

	input attributes | lowercase | summary | normalize | tokenize | index;
        input attributes | lowercase | split ";" | for_each {
          # Loop through each token in attributes string
          switch {

            # De-rank PR articles using the following rules:
            #   1. Set editedstaticrank to '1'
            #   2. Subtract 2.5 hours (9000 seconds) from timestamp used in ranking
            #   3. No superduper rank
            case "typepr": 1 | set_var tmpsourcerank | get_var tmppubdate - 9000 | set_var tmppubdate | 0 | set_var superdupermod;
          }
        };  
      }
      indexing-rewrite: none
      stemming: none
    }

    field searchcluster type string {
      indexing: summary
      stemming: none
    }

    field eustaticrank type int {
      indexing {
        get_var tmpsourcerank * 4000 + get_var superdupereu * get_var superdupermod * 1000 + get_var tmppubdate * 0.5 | summary | index | attribute eustaticrank;
      }
    }

    field usstaticrank type int {
      indexing {
        get_var tmpsourcerank * 4000 + get_var superduperus * get_var superdupermod * 1000 + get_var tmppubdate * 0.5 | summary | index | attribute usstaticrank;
      }
    }

    field asiastaticrank type int {
      indexing {
        get_var tmpsourcerank * 4000 + get_var superduperasia * get_var superdupermod * 1000 + get_var tmppubdate * 0.5 | summary | index | attribute asiastaticrank;
      }
    }
  }

  rank-profile date {
  }

  rank-profile usrank inherits default {
  }

  rank-profile eurank inherits default {
  }

  rank-profile asiarank inherits default {
  }

}