aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java
blob: 3565b5b139f1ecb8a6ed1f8ddb1cbb53e4efedfb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.searchlib.ranking.features.fieldmatch;

/**
 * <p>Information on segment start points stored temporarily during string match metric calculation.</p>
 *
 * <p>Given that we want to start a segment at i, this holdes the best known metrics up to i
 * and the end of the previous segment. In addition it holds information on how far we have tried
 * to look for alternative segments from this starting point (skipI and previousJ).</p>
 *
 * @author  bratseth
 */
final class SegmentStartPoint {

    private FieldMatchMetricsComputer owner;

    /** The i for which this is the possible segment starting points */
    private int i;

    private int skipI;

    /** The best known metrics up to this starting point */
    private FieldMatchMetrics metrics;

    /** The j ending the previous segmentation producing those best metrics */
    private int previousJ;

    /** The semantic distance from the current previousJ which is already explored */
    private int semanticDistanceExplored=0;

    /** There are possibly more j's to try at this starting point */
    boolean open=true;

    /** Creates a segment start point for the first segment */
    public SegmentStartPoint(FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) {
        this.i=0;
        this.previousJ=0;
        this.metrics=metrics;
        this.owner=owner;
        this.semanticDistanceExplored=0;
    }

    /** Creates a segment start point for any i position where the j is not known */
    public SegmentStartPoint(int i,int previousJ,FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) {
        this.i=i;
        this.previousJ=previousJ;
        this.metrics=metrics;
        this.owner=owner;
        this.semanticDistanceExplored=0;
    }

    /** Creates a segment start point for any position, where the j of the start point is known */
    public SegmentStartPoint(int i,int j,int previousJ,FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) {
        this.i=i;
        this.previousJ=previousJ;
        this.metrics=metrics;
        this.owner=owner;
        this.semanticDistanceExplored=owner.fieldIndexToSemanticDistance(j,previousJ)+1;
    }

    /** Returns the current best metrics for this starting point */
    public FieldMatchMetrics getMetrics() { return metrics; }

    /**
     * Stores that we have explored to a certain j from the current previousJ.
     */
    public void exploredTo(int j) {
        semanticDistanceExplored=owner.fieldIndexToSemanticDistance(j,previousJ)+1;
    }

    /**
     * Offers an alternative history leading up to this point, which is accepted and stored if it is
     * better than the current history
     */
    public void offerHistory(int offeredPreviousJ,FieldMatchMetrics offeredMetrics,boolean collectTrace) {
        if (offeredMetrics.getSegmentationScore()<=metrics.getSegmentationScore()) {
            if (collectTrace)
                offeredMetrics.trace().add("    rejected offered history [match: " + offeredMetrics.getSegmentationScore() +
                                           " ending at:" + previousJ + "] at " + this + "\n");
            return; // Reject
        }

        /*
        if (previousJ!=offeredPreviousJ) { // Starting over like this achieves higher correctness if
            semanticDistanceExplored=0;    // the match metric is dependent on relative distance between segments
            open=true;                     // but is more expensive
        }
        */

        if (collectTrace)
            offeredMetrics.trace().add("    accepted offered history [match: " + offeredMetrics.getSegmentationScore() +
                                       " ending at:" + previousJ + "] at " + this + "\n");

        previousJ=offeredPreviousJ;
        metrics=offeredMetrics;
    }

    /**
     * Returns whether there are possibly still unexplored j's for this i
     */
    public boolean isOpen() { return open; }

    public void setOpen(boolean open) { this.open=open; }

    /** Returns the i for which this is the possible segment starting points */
    public int getI() { return i; }

    /**
     * Returns the j ending the previous segmentation producing those best metrics,
     */
    public int getPreviousJ() { return previousJ; }

    /**
     * Returns the semantic distance from the previous j which is explored so far, exclusive
     * (meaning, if the value is 0, 0 is <i>not</i> explored yet)
     */
    public int getSemanticDistanceExplored() { return semanticDistanceExplored; }

    public void setSemanticDistanceExplored(int distance) { this.semanticDistanceExplored=distance; }

    /**
     * Returns the position startI we should start at from this start point i.
     * startI==i except when there are i's from this starting point which are not found anywhere in
     * the field. In that case, startI==i+the number of terms following i which are known not to be present
     */
    public int getStartI() {
        return i+skipI;
    }

    /**
     * Increments the startI by one because we have discovered that the term at the current startI is not
     * present in the field
     */
    public void incrementStartI() { skipI++; }

    public String toString() {
        if (i==owner.getQuery().getTerms().length)
            return "last segment: Complete match: " + metrics.getMatch() + " previous j: " + previousJ +
                    " (" + (open ? "open" : "closed") + ")";
        return "segment at " + i + " (" + owner.getQuery().getTerms()[i] + "): Match up to here: " + metrics.getMatch() + " previous j: " +
                previousJ +  " explored to: " + semanticDistanceExplored +
                " (" + (open ? "open" : "closed") + ")";
    }

}