aboutsummaryrefslogtreecommitdiffstats
path: root/fsa/src/alltest/vectorizer_test.cpp
blob: a5b8e1af03691894d1925312a7856569abcc21f1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/**
 * @author  Peter Boros
 * @date    2004/08/20
 * @version $Id$
 * @file    vectorizertest.cpp
 * @brief   Test for the vectorizer class
 *
 */

#include <iostream>
#include <iomanip>

#include <vespa/fsa/vectorizer.h>

using namespace fsa;

int main(int argc, char **argv)
{
  FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");

  Vectorizer v(dict);
  Vectorizer::TermVector tv;

  std::string text;
  NGram tokenized_text;

  while(!std::cin.eof()){
    getline(std::cin,text);

    tokenized_text.set(text);
    v.vectorize(tokenized_text,tv);

    for(unsigned int i=0; i<tv.size(); i++){
      std::cout << tv[i].term() << ", " << tv[i].weight() << std::endl;
    }
  }

  return 0;
}