aboutsummaryrefslogtreecommitdiffstats
path: root/fsa/src/alltest/ngram_test.cpp
blob: 6cc3e0282d72dac3031a72325134ecfa2c5ed22d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <iostream>

#include <vespa/fsa/permuter.h>
#include <vespa/fsa/selector.h>
#include <vespa/fsa/ngram.h>
#include <vespa/fsa/base64.h>
#include <vespa/fsa/wordchartokenizer.h>

using namespace fsa;

int main(int, char **)
{
  Permuter p;

  NGram q1("a b c d e f"), q2(q1,p,10), q3(q2,p,13);

  Selector s;

  std::string s1("this is a test"), s2;

  Base64::encode(s1,s2);
  std::cout << "'" << s1 << "'" << std::endl;
  std::cout << "'" << s2 << "'" << std::endl;
  Base64::decode(s2,s1);
  std::cout << "'" << s1 << "'" << std::endl;


  std::cout << q1 << std::endl;
  std::cout << q2 << std::endl;
  std::cout << q3 << std::endl;

  q2.sort();
  std::cout << q2 << std::endl;
  q2.reverse();
  std::cout << q2 << std::endl;

  std::cout << std::hex;
  for(unsigned int n=1;n<=6;n++){
    unsigned int c=Permuter::firstComb(n,6);
    while(c>0){
      s.clear();
      s.set(c);
      q2.set(q1,s);
      std::cout << c << ": " << q2 << std::endl;
      c=Permuter::nextComb(c,6);
    }
  }
  std::cout << std::dec;

  WordCharTokenizer tokenizer(WordCharTokenizer::PUNCTUATION_SMART,"PUNCT");

  NGram q4("test, wordchar tokenizer. does it work?",tokenizer);

  std::cout << q4.join(" -|- ") << std::endl;

}