aboutsummaryrefslogtreecommitdiffstats
path: root/fsa/src/vespa/fsa/detector.cpp
blob: a6fc88ba019c7075b61e262b4d4ca4a516ec717d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/**
 * @author  Peter Boros
 * @date    2004/08/20
 * @version $Id$
 * @file    detector.cpp
 * @brief   %FSA (%Finite %State %Automaton) based detector (implementation)
 *
 */

#include <list>
#include <algorithm>
#include <cmath>

#include "detector.h"
#include "fsa.h"
#include "ngram.h"


namespace fsa {

// {{{ Detector::detect

void Detector::detect(const NGram &text, Detector::Hits &hits,
                      unsigned int from, int length) const
{
  std::list<FSA::WordCounterState>            detectors;
  std::list<FSA::WordCounterState>::iterator  det_it;
  unsigned int i,to;

  to = text.length();
  if(length!=-1 && from+length<to)
    to=from+length;

  i=from;
  while(i<to){
    detectors.push_back(FSA::WordCounterState(_dictionary));

    det_it=detectors.begin();
    while(det_it!=detectors.end()){
      det_it->deltaWord(text[i]);
      if(det_it->isFinal()){
        hits.add(text, i-det_it->getCounter()+1, det_it->getCounter(), *det_it);
      }

      if(det_it->isValid())
        ++det_it;
      else{
        det_it=detectors.erase(det_it);
      }
    }
    ++i;
  }

  detectors.clear();
}

// }}}
// {{{ Detector::detectWithHash

void Detector::detectWithHash(const NGram &text, Detector::Hits &hits,
                              unsigned int from, int length) const
{
  std::list<FSA::HashedWordCounterState>            detectors;
  std::list<FSA::HashedWordCounterState>::iterator  det_it;
  unsigned int i,to;

  to = text.length();
  if(length!=-1 && from+length<to)
    to=from+length;

  i=from;
  while(i<to){
    detectors.push_back(FSA::HashedWordCounterState(_dictionary));

    det_it=detectors.begin();
    while(det_it!=detectors.end()){
      det_it->deltaWord(text[i]);
      if(det_it->isFinal()){
        hits.add(text, i-det_it->getCounter()+1, det_it->getCounter(), *det_it);
      }

      if(det_it->isValid())
        ++det_it;
      else{
        det_it=detectors.erase(det_it);
      }
    }
    ++i;
  }

  detectors.clear();
}

// }}}

} // namespace fsa