summaryrefslogtreecommitdiffstats
path: root/fsa/src/alltest
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fsa/src/alltest
Publish
Diffstat (limited to 'fsa/src/alltest')
-rw-r--r--fsa/src/alltest/.gitignore15
-rw-r--r--fsa/src/alltest/CMakeLists.txt70
-rwxr-xr-xfsa/src/alltest/alltest.sh11
-rw-r--r--fsa/src/alltest/conceptnet_test.cpp80
-rw-r--r--fsa/src/alltest/conceptnet_test.out4
-rw-r--r--fsa/src/alltest/detector_test.cpp50
-rw-r--r--fsa/src/alltest/detector_test.out26
-rwxr-xr-xfsa/src/alltest/detector_test.sh3
-rw-r--r--fsa/src/alltest/fsa_create_test.cpp94
-rw-r--r--fsa/src/alltest/fsa_perftest.cpp77
-rw-r--r--fsa/src/alltest/fsa_test.cpp114
-rw-r--r--fsa/src/alltest/fsa_test.out60
-rwxr-xr-xfsa/src/alltest/fsa_test.sh3
-rw-r--r--fsa/src/alltest/fsamanager_test.cpp25
-rw-r--r--fsa/src/alltest/lookup_test.cpp49
-rw-r--r--fsa/src/alltest/lookup_test.out41
-rwxr-xr-xfsa/src/alltest/lookup_test.sh3
-rw-r--r--fsa/src/alltest/ngram_test.cpp57
-rw-r--r--fsa/src/alltest/ngram_test.out72
-rwxr-xr-xfsa/src/alltest/ngram_test.sh3
-rw-r--r--fsa/src/alltest/segmenter_test.cpp74
-rw-r--r--fsa/src/alltest/segmenter_test.out332
-rwxr-xr-xfsa/src/alltest/segmenter_test.sh3
-rw-r--r--fsa/src/alltest/testinput.txt41
-rw-r--r--fsa/src/alltest/vectorizer_perftest.cpp95
-rw-r--r--fsa/src/alltest/vectorizer_test.cpp40
-rw-r--r--fsa/src/alltest/vectorizer_test.out26
-rwxr-xr-xfsa/src/alltest/vectorizer_test.sh3
28 files changed, 1471 insertions, 0 deletions
diff --git a/fsa/src/alltest/.gitignore b/fsa/src/alltest/.gitignore
new file mode 100644
index 00000000000..c950caba857
--- /dev/null
+++ b/fsa/src/alltest/.gitignore
@@ -0,0 +1,15 @@
+Makefile
+.depend
+__testfsa__.__fsa__
+fsa_conceptnet_test_app
+fsa_detector_test_app
+fsa_fsa_create_test_app
+fsa_fsa_perf_test_app
+fsa_fsa_test_app
+fsa_fsamanager_test_app
+fsa_lookup_test_app
+fsa_ngram_test_app
+fsa_segmenter_test_app
+fsa_vectorizer_perf_test_app
+fsa_vectorizer_test_app
+*.output
diff --git a/fsa/src/alltest/CMakeLists.txt b/fsa/src/alltest/CMakeLists.txt
new file mode 100644
index 00000000000..d82ca400405
--- /dev/null
+++ b/fsa/src/alltest/CMakeLists.txt
@@ -0,0 +1,70 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(fsa_conceptnet_test_app
+ SOURCES
+ conceptnet_test.cpp
+ DEPENDS
+ fsamanagers
+ fsa
+)
+vespa_add_executable(fsa_detector_test_app
+ SOURCES
+ detector_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsa_test_app
+ SOURCES
+ fsa_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsa_create_test_app
+ SOURCES
+ fsa_create_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsa_perf_test_app
+ SOURCES
+ fsa_perftest.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsamanager_test_app
+ SOURCES
+ fsamanager_test.cpp
+ DEPENDS
+ fsamanagers
+ fsa
+)
+vespa_add_executable(fsa_lookup_test_app
+ SOURCES
+ lookup_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_ngram_test_app
+ SOURCES
+ ngram_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_segmenter_test_app
+ SOURCES
+ segmenter_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_vectorizer_test_app
+ SOURCES
+ vectorizer_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_vectorizer_perf_test_app
+ SOURCES
+ vectorizer_perftest.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_test(NAME fsa_vectorizer_perf_test_app NO_VALGRIND COMMAND sh alltest.sh)
diff --git a/fsa/src/alltest/alltest.sh b/fsa/src/alltest/alltest.sh
new file mode 100755
index 00000000000..37274721e25
--- /dev/null
+++ b/fsa/src/alltest/alltest.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+./detector_test.sh
+./fsa_test.sh
+./fsa_fsa_create_test_app
+./fsa_fsa_perf_test_app
+./fsa_fsamanager_test_app . __testfsa__.__fsa__
+./lookup_test.sh
+./ngram_test.sh
+./segmenter_test.sh
+./vectorizer_test.sh
+./fsa_vectorizer_perf_test_app
diff --git a/fsa/src/alltest/conceptnet_test.cpp b/fsa/src/alltest/conceptnet_test.cpp
new file mode 100644
index 00000000000..38c020aa511
--- /dev/null
+++ b/fsa/src/alltest/conceptnet_test.cpp
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <vespa/fsa/conceptnet.h>
+#include <vespa/fsamanagers/conceptnetmanager.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ char opt;
+ //extern char *optarg;
+ extern int optind;
+
+ bool do_ext = false, do_assoc = false, do_cat = false;
+
+ while((opt=getopt(argc,argv,"aec")) != -1){
+ switch(opt){
+ case 'a':
+ do_assoc = true;
+ break;
+ case 'e':
+ do_ext = true;
+ break;
+ case 'c':
+ do_cat = true;
+ break;
+ case '?':
+ fprintf(stderr,"conceptnet_test: unrecognized option");
+ exit(1);
+ }
+ }
+
+ if(optind>=argc){
+ fprintf(stderr,"usage: conceptnet_test [-aec] DOMAIN [UNIT ...]\n");
+ exit(1);
+ }
+
+ std::string domain = argv[optind];
+
+ if(!ConceptNetManager::instance().load(domain,
+ domain + ".fsa",
+ domain + ".dat")){
+ fprintf(stderr,"failed to load concept net %s\n",domain.c_str());
+ exit(1);
+ }
+
+ ConceptNet::Handle* cn = ConceptNetManager::instance().get(domain);
+
+ if(cn!=NULL){
+ for(int i=optind+1;i<argc;i++){
+ int idx = (*cn)->lookup(argv[i]);
+ printf("%s(%d) : (%d,%d,%d,%d) (%f,%f)\n",argv[i],idx,
+ (*cn)->frq(idx),(*cn)->cFrq(idx),(*cn)->qFrq(idx),(*cn)->sFrq(idx),
+ (*cn)->score(idx),(*cn)->strength(idx));
+ if(do_ext){
+ for(int e = 0; e<(*cn)->numExt(idx); e++){
+ printf(" %s, %d\n",(*cn)->lookup((*cn)->ext(idx,e)),(*cn)->extFrq(idx,e));
+ }
+ }
+ if(do_assoc){
+ for(int a = 0; a<(*cn)->numAssoc(idx); a++){
+ printf(" %s, %d\n",(*cn)->lookup((*cn)->assoc(idx,a)),(*cn)->assocFrq(idx,a));
+ }
+ }
+ if(do_cat){
+ for(int c = 0; c<(*cn)->numCat(idx); c++){
+ printf(" %s\n",(*cn)->catName((*cn)->cat(idx,c)));
+ }
+ }
+ }
+ }
+ else {
+ fprintf(stderr,"failed to load concept net %s\n",domain.c_str());
+ exit(1);
+ }
+
+}
diff --git a/fsa/src/alltest/conceptnet_test.out b/fsa/src/alltest/conceptnet_test.out
new file mode 100644
index 00000000000..9f3570cebf1
--- /dev/null
+++ b/fsa/src/alltest/conceptnet_test.out
@@ -0,0 +1,4 @@
+new york(841954) : (-1,-1,-1,-1) (-1.000000,-1.000000)
+sunnyvale(1139231) : (-1,-1,-1,-1) (-1.000000,-1.000000)
+gibson(479780) : (-1,-1,-1,-1) (-1.000000,-1.000000)
+metallica(770993) : (-1,-1,-1,-1) (-1.000000,-1.000000)
diff --git a/fsa/src/alltest/detector_test.cpp b/fsa/src/alltest/detector_test.cpp
new file mode 100644
index 00000000000..1942c4ba7a6
--- /dev/null
+++ b/fsa/src/alltest/detector_test.cpp
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizertest.cpp
+ * @brief Test for the vectorizer class
+ *
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/detector.h>
+#include <vespa/fsa/ngram.h>
+
+using namespace fsa;
+
+class MyHits : public Detector::Hits{
+public:
+ MyHits() {};
+ ~MyHits() {};
+
+ void add(const NGram &text,
+ unsigned int from, int length,
+ const FSA::State &)
+ {
+ std::cout << "detected: [" << from << "," << from+length-1 << "], '"
+ << text.join(" ",from,length) << "'\n";
+ }
+};
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Detector d(dict);
+ MyHits h;
+
+ std::string text;
+ while(!std::cin.eof()){
+ getline(std::cin,text);
+
+ d.detect(text,h);
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/detector_test.out b/fsa/src/alltest/detector_test.out
new file mode 100644
index 00000000000..c5dbbdd08f1
--- /dev/null
+++ b/fsa/src/alltest/detector_test.out
@@ -0,0 +1,26 @@
+detected: [0,0], 'apple'
+detected: [0,0], 'apricot'
+detected: [0,0], 'artichoke'
+detected: [0,0], 'banana'
+detected: [0,0], 'cabbage'
+detected: [0,0], 'carrot'
+detected: [0,0], 'cherry'
+detected: [0,0], 'chili'
+detected: [0,0], 'cucumber'
+detected: [0,0], 'eggplant'
+detected: [0,0], 'grapes'
+detected: [0,0], 'lettuce'
+detected: [0,0], 'onion'
+detected: [0,0], 'paprika'
+detected: [0,1], 'passion fruit'
+detected: [0,0], 'pea'
+detected: [0,0], 'peach'
+detected: [0,0], 'pear'
+detected: [0,0], 'pineapple'
+detected: [0,0], 'plum'
+detected: [0,0], 'potato'
+detected: [0,0], 'pumpkin'
+detected: [0,1], 'sour cherry'
+detected: [1,1], 'cherry'
+detected: [0,0], 'squash'
+detected: [0,0], 'tomato'
diff --git a/fsa/src/alltest/detector_test.sh b/fsa/src/alltest/detector_test.sh
new file mode 100755
index 00000000000..dd6f650a35c
--- /dev/null
+++ b/fsa/src/alltest/detector_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_detector_test_app < testinput.txt > detector_test.output
+diff detector_test.output detector_test.out
diff --git a/fsa/src/alltest/fsa_create_test.cpp b/fsa/src/alltest/fsa_create_test.cpp
new file mode 100644
index 00000000000..c72ea900aad
--- /dev/null
+++ b/fsa/src/alltest/fsa_create_test.cpp
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/automaton.h>
+#include <vespa/fsa/timestamp.h>
+
+using namespace fsa;
+
+int main(int, char**)
+{
+
+ Automaton *aut = new Automaton;
+
+ Blob fruit("Fruit"), veggie("Vegetable"), city("City");
+
+ TimeStamp t;
+
+ aut->init();
+
+ aut->insertSortedString("Cupertino",city);
+ aut->insertSortedString("Foster City",city);
+ aut->insertSortedString("Los Altos",city);
+ aut->insertSortedString("Menlo Park",city);
+ aut->insertSortedString("Mountain View",city);
+ aut->insertSortedString("Palo Alto",city);
+ aut->insertSortedString("San Francisco",city);
+ aut->insertSortedString("San Jose",city);
+ aut->insertSortedString("Santa Clara",city);
+ aut->insertSortedString("Saratoga",city);
+ aut->insertSortedString("Sunnyvale",city);
+ aut->insertSortedString("apple",fruit);
+ aut->insertSortedString("apricot",fruit);
+ aut->insertSortedString("artichoke",veggie);
+ aut->insertSortedString("banana",fruit);
+ aut->insertSortedString("cabbage",veggie);
+ aut->insertSortedString("carrot",veggie);
+ aut->insertSortedString("cherry",fruit);
+ aut->insertSortedString("chili",veggie);
+ aut->insertSortedString("cucumber",veggie);
+ aut->insertSortedString("eggplant",veggie);
+ aut->insertSortedString("grapes",fruit);
+ aut->insertSortedString("lettuce",veggie);
+ aut->insertSortedString("onion",veggie);
+ aut->insertSortedString("paprika",veggie);
+ aut->insertSortedString("passion fruit",fruit);
+ aut->insertSortedString("pea",veggie);
+ aut->insertSortedString("peach",fruit);
+ aut->insertSortedString("pear",fruit);
+ aut->insertSortedString("pineapple",fruit);
+ aut->insertSortedString("plum",fruit);
+ aut->insertSortedString("potato",veggie);
+ aut->insertSortedString("pumpkin",veggie);
+ aut->insertSortedString("sour cherry",fruit);
+ aut->insertSortedString("squash",veggie);
+ aut->insertSortedString("tomato",veggie);
+
+ aut->finalize();
+
+ double d1 = t.elapsed();
+
+ aut->addPerfectHash();
+
+ double d2 = t.elapsed();
+
+ aut->write("__testfsa__.__fsa__");
+
+ double d3 = t.elapsed();
+
+ FSA *fsa = aut->getFSA();
+
+ double d4 = t.elapsed();
+
+ std::cout << "Automoaton build finished (" << 1000*d1 << "ms," << 1000*(d2-d1) << "ms)"
+ << ", fsa retrieval (" << 1000*(d4-d3) << "ms) " << ((fsa==NULL)?"failed":"succeded") << ".\n";
+
+ if(fsa!=NULL){
+ FSA::State fs(*fsa);
+ const unsigned char *pb = fs.lookup("cucumber");
+ std::cout << "Lookup(\"cucumber\") -> ";
+ if(pb!=NULL){
+ std::cout << "\"" << pb << "\"";
+ }
+ else{
+ std::cout << "not found.";
+ }
+ std::cout << "\n";
+ }
+
+ delete aut;
+ delete fsa;
+
+ return 0;
+}
diff --git a/fsa/src/alltest/fsa_perftest.cpp b/fsa/src/alltest/fsa_perftest.cpp
new file mode 100644
index 00000000000..90d2c042b07
--- /dev/null
+++ b/fsa/src/alltest/fsa_perftest.cpp
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/timestamp.h>
+
+using namespace fsa;
+
+int main(int, char**)
+{
+ FSA f("__testfsa__.__fsa__");
+ FSA::State s(f);
+ FSA::HashedState hs(f);
+ FSA::MemoryState ms(f);
+ FSA::HashedMemoryState hms(f);
+ FSA::CounterState cs(f);
+ std::string input("cucumber");
+ unsigned int count=10000000,i;
+
+ std::cout << "Number of lookups: " << count << std::endl;
+ std::cout << "Input string length: " << input.length() << std::endl;
+ std::cout << std::endl;
+
+ TimeStamp t;
+ double t0,t1;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ s.start();
+ s.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "State: " << t1*1000 << " ms" << "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ hs.start();
+ hs.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "HashedState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ ms.start();
+ ms.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "MemoryState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ hms.start();
+ hms.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "HashedMemoryState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ cs.start();
+ cs.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "CounterState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ return 0;
+}
diff --git a/fsa/src/alltest/fsa_test.cpp b/fsa/src/alltest/fsa_test.cpp
new file mode 100644
index 00000000000..5bc95f20430
--- /dev/null
+++ b/fsa/src/alltest/fsa_test.cpp
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+
+using namespace fsa;
+
+int main(int, char**)
+{
+ FSA *f = new FSA("__testfsa__.__fsa__", FILE_ACCESS_MMAP);
+ FSA::State *fs = new FSA::State(*f);
+
+ std::string s("cucu");
+ fs->start(s);
+ fs->delta('m');
+ fs->delta("ber");
+ if(fs->isFinal()){
+ printf("start/delta test: string(\"cucu\")+'m'+\"ber\" is accepted\n");
+ printf(" data size: %d\n",fs->dataSize());
+ printf(" data string: \"%-*.*s\"\n",fs->dataSize(),fs->dataSize(),fs->data());
+ }
+ else {
+ printf("start/delta test failed.\n");
+ }
+
+ const unsigned char *pb = fs->lookup("cucumber");
+ if(pb!=NULL){
+ printf("lookup test: \"cucumber\" -> \"%s\"\n",pb);
+ }
+ else{
+ printf("lookup test: \"cucumber\" not found.\n");
+ }
+
+
+ FSA::HashedState *fs1 = new FSA::HashedState(*f);
+
+
+ fs1->delta("pe");
+
+ FSA::HashedState *fs2 = new FSA::HashedState(*fs1);
+ FSA::HashedState *fs3 = new FSA::HashedState(*fs1);
+
+
+
+ fs1->delta("a");
+ fs2->delta("ach");
+ fs3->delta("ar");
+
+ if(fs1->isFinal() && fs2->isFinal()){
+ printf("copy hashed state test:\n");
+ printf(" \"pe\"+\"a\": hash=%d, data_size=%d, data string=\"%-*.*s\"\n",
+ fs1->hash(),fs1->dataSize(),fs1->dataSize(),fs1->dataSize(),fs1->data());
+ printf(" \"pe\"+\"ach\": hash=%d, data_size=%d, data string=\"%-*.*s\"\n",
+ fs2->hash(),fs2->dataSize(),fs2->dataSize(),fs2->dataSize(),fs2->data());
+ printf(" \"pe\"+\"ar\": hash=%d, data_size=%d, data string=\"%-*.*s\"\n",
+ fs3->hash(),fs3->dataSize(),fs3->dataSize(),fs3->dataSize(),fs3->data());
+
+ }
+ else {
+ printf("copy hashed state test failed.\n");
+ }
+
+ printf("revLookup test:\n");
+ unsigned int i=0;
+ std::string res;
+ while(i<100){
+ res=fs2->revLookup(i);
+ if(res.size()==0)
+ break;
+ fs2->lookup(res);
+ printf(" %d -> %s -> %d\n",i,res.c_str(),fs2->hash());
+ i++;
+ }
+
+ printf("iterator test:\n");
+ fs1->start('p');
+ printf(" possible continuations from \"p\":\n");
+ for(FSA::iterator it(*fs1); it!=fs1->end(); ++it){
+ printf(" \"p\" + \"%s\"\n",it->str().c_str());
+ }
+
+ delete fs;
+ delete fs1;
+ delete fs2;
+ delete fs3;
+
+
+ printf("counter/memory state test\n");
+ FSA::CounterState *cs = new FSA::CounterState(*f);
+ FSA::MemoryState *ms = new FSA::MemoryState(*f);
+
+ cs->start("cucu");
+ ms->start("cucu");
+ printf(" \"cucu\" -> %s:%d\n",ms->memory().c_str(),cs->counter());
+
+ cs->start("cucumber");
+ ms->start("cucumber");
+ printf(" \"cucumber\" -> %s:%d\n",ms->memory().c_str(),cs->counter());
+
+ cs->start("cucumber slumber");
+ ms->start("cucumber slumber");
+ printf(" \"cucumber slumber\" -> %s:%d\n",ms->memory().c_str(),cs->counter());
+
+ delete cs;
+ delete ms;
+ delete f;
+
+ return 0;
+}
diff --git a/fsa/src/alltest/fsa_test.out b/fsa/src/alltest/fsa_test.out
new file mode 100644
index 00000000000..b9c96e5b795
--- /dev/null
+++ b/fsa/src/alltest/fsa_test.out
@@ -0,0 +1,60 @@
+start/delta test: string("cucu")+'m'+"ber" is accepted
+ data size: 10
+ data string: "Vegetable "
+lookup test: "cucumber" -> "Vegetable"
+copy hashed state test:
+ "pe"+"a": hash=26, data_size=10, data string="Vegetable "
+ "pe"+"ach": hash=27, data_size=6, data string="Fruit "
+ "pe"+"ar": hash=28, data_size=6, data string="Fruit "
+revLookup test:
+ 0 -> Cupertino -> 0
+ 1 -> Foster City -> 1
+ 2 -> Los Altos -> 2
+ 3 -> Menlo Park -> 3
+ 4 -> Mountain View -> 4
+ 5 -> Palo Alto -> 5
+ 6 -> San Francisco -> 6
+ 7 -> San Jose -> 7
+ 8 -> Santa Clara -> 8
+ 9 -> Saratoga -> 9
+ 10 -> Sunnyvale -> 10
+ 11 -> apple -> 11
+ 12 -> apricot -> 12
+ 13 -> artichoke -> 13
+ 14 -> banana -> 14
+ 15 -> cabbage -> 15
+ 16 -> carrot -> 16
+ 17 -> cherry -> 17
+ 18 -> chili -> 18
+ 19 -> cucumber -> 19
+ 20 -> eggplant -> 20
+ 21 -> grapes -> 21
+ 22 -> lettuce -> 22
+ 23 -> onion -> 23
+ 24 -> paprika -> 24
+ 25 -> passion fruit -> 25
+ 26 -> pea -> 26
+ 27 -> peach -> 27
+ 28 -> pear -> 28
+ 29 -> pineapple -> 29
+ 30 -> plum -> 30
+ 31 -> potato -> 31
+ 32 -> pumpkin -> 32
+ 33 -> sour cherry -> 33
+ 34 -> squash -> 34
+ 35 -> tomato -> 35
+iterator test:
+ possible continuations from "p":
+ "p" + "aprika"
+ "p" + "assion fruit"
+ "p" + "ea"
+ "p" + "each"
+ "p" + "ear"
+ "p" + "ineapple"
+ "p" + "lum"
+ "p" + "otato"
+ "p" + "umpkin"
+counter/memory state test
+ "cucu" -> cucu:4
+ "cucumber" -> cucumber:8
+ "cucumber slumber" -> cucumber:8
diff --git a/fsa/src/alltest/fsa_test.sh b/fsa/src/alltest/fsa_test.sh
new file mode 100755
index 00000000000..497fd291c4d
--- /dev/null
+++ b/fsa/src/alltest/fsa_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_fsa_test_app > fsa_test.output
+diff fsa_test.output fsa_test.out
diff --git a/fsa/src/alltest/fsamanager_test.cpp b/fsa/src/alltest/fsamanager_test.cpp
new file mode 100644
index 00000000000..7ca4a2d8e8a
--- /dev/null
+++ b/fsa/src/alltest/fsamanager_test.cpp
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsamanagers/fsamanager.h>
+
+#include <iostream>
+#include <string>
+#include <stdlib.h>
+
+using namespace fsa;
+
+int main(int argc, char** argv)
+{
+ if(argc<3){
+ std::cerr << "usage: fsamanager_test cache_dir fsa_file_or_url [fsa_file_or_url ...]\n";
+ exit(1);
+ }
+
+ FSAManager::instance().setCacheDir(argv[1]);
+
+ for(int i=2;i<argc;i++){
+ std::cerr << "Loading " << argv[i] << " ... ";
+ std::cerr << (FSAManager::instance().load(argv[i],argv[i]) ? "ok":"failed") << "\n";
+ }
+
+}
diff --git a/fsa/src/alltest/lookup_test.cpp b/fsa/src/alltest/lookup_test.cpp
new file mode 100644
index 00000000000..6ff4e3063d4
--- /dev/null
+++ b/fsa/src/alltest/lookup_test.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+
+#if (__GNUG__ <3 || (__GNUG__ == 3 && __GNUC_MINOR__ < 1))
+namespace std {
+const char *left = "";
+}
+#endif
+
+using namespace fsa;
+
+int main(int argc, char** argv)
+{
+
+ if(argc!=2){
+ std::cerr << "usage: lookup_test fsafile <input >output" << std::endl;
+ exit(1);
+ }
+
+ FSA f(argv[1]);
+ FSA::HashedState fs(f);
+ std::string input;
+
+ while(!std::cin.eof()){
+ getline(std::cin,input);
+
+ if(input.size()>0){
+ fs.start(input);
+ if(fs.isFinal()){
+ std::cout << "'" << input << "'" << " is accepted, hash value: " << fs.hash()
+ << ", data size: " << fs.dataSize()
+ << ", data string: \""
+ << std::setw(fs.dataSize()) << std::left << fs.data()
+ << "\"" << std::endl;
+ }
+ else{
+ std::cout << "'" << input << "'" << " is not accepted." << std::endl;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/lookup_test.out b/fsa/src/alltest/lookup_test.out
new file mode 100644
index 00000000000..b7dd9b4da4b
--- /dev/null
+++ b/fsa/src/alltest/lookup_test.out
@@ -0,0 +1,41 @@
+'Cupertino' is accepted, hash value: 0, data size: 5, data string: "City "
+'Foster City' is accepted, hash value: 1, data size: 5, data string: "City "
+'Los Altos' is accepted, hash value: 2, data size: 5, data string: "City "
+'Menlo Park' is accepted, hash value: 3, data size: 5, data string: "City "
+'Mountain View' is accepted, hash value: 4, data size: 5, data string: "City "
+'Palo Alto' is accepted, hash value: 5, data size: 5, data string: "City "
+'San Francisco' is accepted, hash value: 6, data size: 5, data string: "City "
+'San Jose' is accepted, hash value: 7, data size: 5, data string: "City "
+'Santa Clara' is accepted, hash value: 8, data size: 5, data string: "City "
+'Saratoga' is accepted, hash value: 9, data size: 5, data string: "City "
+'Sunnyvale' is accepted, hash value: 10, data size: 5, data string: "City "
+'apple' is accepted, hash value: 11, data size: 6, data string: "Fruit "
+'apricot' is accepted, hash value: 12, data size: 6, data string: "Fruit "
+'artichoke' is accepted, hash value: 13, data size: 10, data string: "Vegetable "
+'banana' is accepted, hash value: 14, data size: 6, data string: "Fruit "
+'cabbage' is accepted, hash value: 15, data size: 10, data string: "Vegetable "
+'carrot' is accepted, hash value: 16, data size: 10, data string: "Vegetable "
+'cherry' is accepted, hash value: 17, data size: 6, data string: "Fruit "
+'chili' is accepted, hash value: 18, data size: 10, data string: "Vegetable "
+'cucumber' is accepted, hash value: 19, data size: 10, data string: "Vegetable "
+'eggplant' is accepted, hash value: 20, data size: 10, data string: "Vegetable "
+'grapes' is accepted, hash value: 21, data size: 6, data string: "Fruit "
+'lettuce' is accepted, hash value: 22, data size: 10, data string: "Vegetable "
+'onion' is accepted, hash value: 23, data size: 10, data string: "Vegetable "
+'paprika' is accepted, hash value: 24, data size: 10, data string: "Vegetable "
+'passion fruit' is accepted, hash value: 25, data size: 6, data string: "Fruit "
+'pea' is accepted, hash value: 26, data size: 10, data string: "Vegetable "
+'peach' is accepted, hash value: 27, data size: 6, data string: "Fruit "
+'pear' is accepted, hash value: 28, data size: 6, data string: "Fruit "
+'pineapple' is accepted, hash value: 29, data size: 6, data string: "Fruit "
+'plum' is accepted, hash value: 30, data size: 6, data string: "Fruit "
+'potato' is accepted, hash value: 31, data size: 10, data string: "Vegetable "
+'pumpkin' is accepted, hash value: 32, data size: 10, data string: "Vegetable "
+'sour cherry' is accepted, hash value: 33, data size: 6, data string: "Fruit "
+'squash' is accepted, hash value: 34, data size: 10, data string: "Vegetable "
+'tomato' is accepted, hash value: 35, data size: 10, data string: "Vegetable "
+'alpha' is not accepted.
+'beta' is not accepted.
+'gamma' is not accepted.
+'delta' is not accepted.
+'epsilon' is not accepted.
diff --git a/fsa/src/alltest/lookup_test.sh b/fsa/src/alltest/lookup_test.sh
new file mode 100755
index 00000000000..394baecc78a
--- /dev/null
+++ b/fsa/src/alltest/lookup_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_lookup_test_app __testfsa__.__fsa__ < testinput.txt > lookup_test.output
+diff lookup_test.output lookup_test.out
diff --git a/fsa/src/alltest/ngram_test.cpp b/fsa/src/alltest/ngram_test.cpp
new file mode 100644
index 00000000000..7f0be7769e1
--- /dev/null
+++ b/fsa/src/alltest/ngram_test.cpp
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+
+#include <vespa/fsa/permuter.h>
+#include <vespa/fsa/selector.h>
+#include <vespa/fsa/ngram.h>
+#include <vespa/fsa/base64.h>
+#include <vespa/fsa/wordchartokenizer.h>
+
+using namespace fsa;
+
+int main(int, char **)
+{
+ Permuter p;
+
+ NGram q1("a b c d e f"), q2(q1,p,10), q3(q2,p,13);
+
+ Selector s;
+
+ std::string s1("this is a test"), s2;
+
+ Base64::encode(s1,s2);
+ std::cout << "'" << s1 << "'" << std::endl;
+ std::cout << "'" << s2 << "'" << std::endl;
+ Base64::decode(s2,s1);
+ std::cout << "'" << s1 << "'" << std::endl;
+
+
+ std::cout << q1 << std::endl;
+ std::cout << q2 << std::endl;
+ std::cout << q3 << std::endl;
+
+ q2.sort();
+ std::cout << q2 << std::endl;
+ q2.reverse();
+ std::cout << q2 << std::endl;
+
+ std::cout << std::hex;
+ for(unsigned int n=1;n<=6;n++){
+ unsigned int c=Permuter::firstComb(n,6);
+ while(c>0){
+ s.clear();
+ s.set(c);
+ q2.set(q1,s);
+ std::cout << c << ": " << q2 << std::endl;
+ c=Permuter::nextComb(c,6);
+ }
+ }
+ std::cout << std::dec;
+
+ WordCharTokenizer tokenizer(WordCharTokenizer::PUNCTUATION_SMART,"PUNCT");
+
+ NGram q4("test, wordchar tokenizer. does it work?",tokenizer);
+
+ std::cout << q4.join(" -|- ") << std::endl;
+
+}
diff --git a/fsa/src/alltest/ngram_test.out b/fsa/src/alltest/ngram_test.out
new file mode 100644
index 00000000000..d826e3173dd
--- /dev/null
+++ b/fsa/src/alltest/ngram_test.out
@@ -0,0 +1,72 @@
+'this is a test'
+'dGhpcyBpcyBhIHRlc3Q='
+'this is a test'
+a b c d e f
+b d a c e f
+a b c d e f
+a b c d e f
+f e d c b a
+1: a
+2: b
+4: c
+8: d
+10: e
+20: f
+3: a b
+5: a c
+6: b c
+9: a d
+a: b d
+c: c d
+11: a e
+12: b e
+14: c e
+18: d e
+21: a f
+22: b f
+24: c f
+28: d f
+30: e f
+7: a b c
+b: a b d
+d: a c d
+e: b c d
+13: a b e
+15: a c e
+16: b c e
+19: a d e
+1a: b d e
+1c: c d e
+23: a b f
+25: a c f
+26: b c f
+29: a d f
+2a: b d f
+2c: c d f
+31: a e f
+32: b e f
+34: c e f
+38: d e f
+f: a b c d
+17: a b c e
+1b: a b d e
+1d: a c d e
+1e: b c d e
+27: a b c f
+2b: a b d f
+2d: a c d f
+2e: b c d f
+33: a b e f
+35: a c e f
+36: b c e f
+39: a d e f
+3a: b d e f
+3c: c d e f
+1f: a b c d e
+2f: a b c d f
+37: a b c e f
+3b: a b d e f
+3d: a c d e f
+3e: b c d e f
+3f: a b c d e f
+test -|- PUNCT -|- wordchar -|- tokenizer -|- PUNCT -|- does -|- it -|- work
diff --git a/fsa/src/alltest/ngram_test.sh b/fsa/src/alltest/ngram_test.sh
new file mode 100755
index 00000000000..85559d6e391
--- /dev/null
+++ b/fsa/src/alltest/ngram_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_ngram_test_app > ngram_test.output
+diff ngram_test.output ngram_test.out
diff --git a/fsa/src/alltest/segmenter_test.cpp b/fsa/src/alltest/segmenter_test.cpp
new file mode 100644
index 00000000000..3b80fe3390e
--- /dev/null
+++ b/fsa/src/alltest/segmenter_test.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file segmenter_test.cpp
+ * @brief Test for the Segmenter class
+ *
+ */
+
+#include <iostream>
+#include <iomanip>
+
+#include <vespa/fsa/segmenter.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Segmenter segmenter(dict);
+ Segmenter::Segments segments;
+ const Segmenter::Segmentation *segmentation;
+
+ std::string text;
+ while(!std::cin.eof()){
+ getline(std::cin,text);
+
+ if(text.size()>3){
+
+ segmenter.segment(text,segments);
+
+ std::cout << "List of all segments:" << std::endl;
+ for(unsigned int i=0; i<segments.size(); i++){
+ std::cout << " "
+ << segments.sgm(i) << ":" << segments.conn(i) << " ["
+ << segments.beg(i) << "," << segments.end(i)-1 << "]"
+ << std::endl;
+ }
+
+ segmentation=segments.segmentation(Segmenter::SEGMENTATION_WEIGHTED);
+
+ std::cout << "Weighted segmentation:" << std::endl << " ";
+ for(Segmenter::SegmentationConstIterator it=segmentation->begin();
+ it!=segmentation->end();++it){
+ std::cout << "(" << segments.sgm(*it) << ")";
+ }
+ std::cout << std::endl;
+
+ segmentation=segments.segmentation(Segmenter::SEGMENTATION_RIGHTMOST_LONGEST);
+
+ std::cout << "Rightmost-longest segmentation:" << std::endl << " ";
+ for(Segmenter::SegmentationConstIterator it=segmentation->begin();
+ it!=segmentation->end();++it){
+ std::cout << "(" << segments.sgm(*it) << ")";
+ }
+ std::cout << std::endl;
+
+ segmentation=segments.segmentation(Segmenter::SEGMENTATION_LEFTMOST_LONGEST);
+
+ std::cout << "Lefttmost-longest segmentation:" << std::endl << " ";
+ for(Segmenter::SegmentationConstIterator it=segmentation->begin();
+ it!=segmentation->end();++it){
+ std::cout << "(" << segments.sgm(*it) << ")";
+ }
+ std::cout << std::endl;
+
+ }
+
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/segmenter_test.out b/fsa/src/alltest/segmenter_test.out
new file mode 100644
index 00000000000..d8c42cfacce
--- /dev/null
+++ b/fsa/src/alltest/segmenter_test.out
@@ -0,0 +1,332 @@
+List of all segments:
+ cupertino:0 [0,0]
+Weighted segmentation:
+ (cupertino)
+Rightmost-longest segmentation:
+ (cupertino)
+Lefttmost-longest segmentation:
+ (cupertino)
+List of all segments:
+ foster:0 [0,0]
+ city:0 [1,1]
+Weighted segmentation:
+ (foster)(city)
+Rightmost-longest segmentation:
+ (foster)(city)
+Lefttmost-longest segmentation:
+ (foster)(city)
+List of all segments:
+ los:0 [0,0]
+ altos:0 [1,1]
+Weighted segmentation:
+ (los)(altos)
+Rightmost-longest segmentation:
+ (los)(altos)
+Lefttmost-longest segmentation:
+ (los)(altos)
+List of all segments:
+ menlo:0 [0,0]
+ park:0 [1,1]
+Weighted segmentation:
+ (menlo)(park)
+Rightmost-longest segmentation:
+ (menlo)(park)
+Lefttmost-longest segmentation:
+ (menlo)(park)
+List of all segments:
+ mountain:0 [0,0]
+ view:0 [1,1]
+Weighted segmentation:
+ (mountain)(view)
+Rightmost-longest segmentation:
+ (mountain)(view)
+Lefttmost-longest segmentation:
+ (mountain)(view)
+List of all segments:
+ palo:0 [0,0]
+ alto:0 [1,1]
+Weighted segmentation:
+ (palo)(alto)
+Rightmost-longest segmentation:
+ (palo)(alto)
+Lefttmost-longest segmentation:
+ (palo)(alto)
+List of all segments:
+ san:0 [0,0]
+ francisco:0 [1,1]
+Weighted segmentation:
+ (san)(francisco)
+Rightmost-longest segmentation:
+ (san)(francisco)
+Lefttmost-longest segmentation:
+ (san)(francisco)
+List of all segments:
+ san:0 [0,0]
+ jose:0 [1,1]
+Weighted segmentation:
+ (san)(jose)
+Rightmost-longest segmentation:
+ (san)(jose)
+Lefttmost-longest segmentation:
+ (san)(jose)
+List of all segments:
+ santa:0 [0,0]
+ clara:0 [1,1]
+Weighted segmentation:
+ (santa)(clara)
+Rightmost-longest segmentation:
+ (santa)(clara)
+Lefttmost-longest segmentation:
+ (santa)(clara)
+List of all segments:
+ saratoga:0 [0,0]
+Weighted segmentation:
+ (saratoga)
+Rightmost-longest segmentation:
+ (saratoga)
+Lefttmost-longest segmentation:
+ (saratoga)
+List of all segments:
+ sunnyvale:0 [0,0]
+Weighted segmentation:
+ (sunnyvale)
+Rightmost-longest segmentation:
+ (sunnyvale)
+Lefttmost-longest segmentation:
+ (sunnyvale)
+List of all segments:
+ apple:1769304646 [0,0]
+Weighted segmentation:
+ (apple)
+Rightmost-longest segmentation:
+ (apple)
+Lefttmost-longest segmentation:
+ (apple)
+List of all segments:
+ apricot:1769304646 [0,0]
+Weighted segmentation:
+ (apricot)
+Rightmost-longest segmentation:
+ (apricot)
+Lefttmost-longest segmentation:
+ (apricot)
+List of all segments:
+ artichoke:1701274966 [0,0]
+Weighted segmentation:
+ (artichoke)
+Rightmost-longest segmentation:
+ (artichoke)
+Lefttmost-longest segmentation:
+ (artichoke)
+List of all segments:
+ banana:1769304646 [0,0]
+Weighted segmentation:
+ (banana)
+Rightmost-longest segmentation:
+ (banana)
+Lefttmost-longest segmentation:
+ (banana)
+List of all segments:
+ cabbage:1701274966 [0,0]
+Weighted segmentation:
+ (cabbage)
+Rightmost-longest segmentation:
+ (cabbage)
+Lefttmost-longest segmentation:
+ (cabbage)
+List of all segments:
+ carrot:1701274966 [0,0]
+Weighted segmentation:
+ (carrot)
+Rightmost-longest segmentation:
+ (carrot)
+Lefttmost-longest segmentation:
+ (carrot)
+List of all segments:
+ cherry:1769304646 [0,0]
+Weighted segmentation:
+ (cherry)
+Rightmost-longest segmentation:
+ (cherry)
+Lefttmost-longest segmentation:
+ (cherry)
+List of all segments:
+ chili:1701274966 [0,0]
+Weighted segmentation:
+ (chili)
+Rightmost-longest segmentation:
+ (chili)
+Lefttmost-longest segmentation:
+ (chili)
+List of all segments:
+ cucumber:1701274966 [0,0]
+Weighted segmentation:
+ (cucumber)
+Rightmost-longest segmentation:
+ (cucumber)
+Lefttmost-longest segmentation:
+ (cucumber)
+List of all segments:
+ eggplant:1701274966 [0,0]
+Weighted segmentation:
+ (eggplant)
+Rightmost-longest segmentation:
+ (eggplant)
+Lefttmost-longest segmentation:
+ (eggplant)
+List of all segments:
+ grapes:1769304646 [0,0]
+Weighted segmentation:
+ (grapes)
+Rightmost-longest segmentation:
+ (grapes)
+Lefttmost-longest segmentation:
+ (grapes)
+List of all segments:
+ lettuce:1701274966 [0,0]
+Weighted segmentation:
+ (lettuce)
+Rightmost-longest segmentation:
+ (lettuce)
+Lefttmost-longest segmentation:
+ (lettuce)
+List of all segments:
+ onion:1701274966 [0,0]
+Weighted segmentation:
+ (onion)
+Rightmost-longest segmentation:
+ (onion)
+Lefttmost-longest segmentation:
+ (onion)
+List of all segments:
+ paprika:1701274966 [0,0]
+Weighted segmentation:
+ (paprika)
+Rightmost-longest segmentation:
+ (paprika)
+Lefttmost-longest segmentation:
+ (paprika)
+List of all segments:
+ passion:0 [0,0]
+ fruit:0 [1,1]
+ passion fruit:1769304646 [0,1]
+Weighted segmentation:
+ (passion fruit)
+Rightmost-longest segmentation:
+ (passion fruit)
+Lefttmost-longest segmentation:
+ (passion fruit)
+List of all segments:
+ peach:1769304646 [0,0]
+Weighted segmentation:
+ (peach)
+Rightmost-longest segmentation:
+ (peach)
+Lefttmost-longest segmentation:
+ (peach)
+List of all segments:
+ pear:1769304646 [0,0]
+Weighted segmentation:
+ (pear)
+Rightmost-longest segmentation:
+ (pear)
+Lefttmost-longest segmentation:
+ (pear)
+List of all segments:
+ pineapple:1769304646 [0,0]
+Weighted segmentation:
+ (pineapple)
+Rightmost-longest segmentation:
+ (pineapple)
+Lefttmost-longest segmentation:
+ (pineapple)
+List of all segments:
+ plum:1769304646 [0,0]
+Weighted segmentation:
+ (plum)
+Rightmost-longest segmentation:
+ (plum)
+Lefttmost-longest segmentation:
+ (plum)
+List of all segments:
+ potato:1701274966 [0,0]
+Weighted segmentation:
+ (potato)
+Rightmost-longest segmentation:
+ (potato)
+Lefttmost-longest segmentation:
+ (potato)
+List of all segments:
+ pumpkin:1701274966 [0,0]
+Weighted segmentation:
+ (pumpkin)
+Rightmost-longest segmentation:
+ (pumpkin)
+Lefttmost-longest segmentation:
+ (pumpkin)
+List of all segments:
+ sour:0 [0,0]
+ cherry:1769304646 [1,1]
+ sour cherry:1769304646 [0,1]
+Weighted segmentation:
+ (sour cherry)
+Rightmost-longest segmentation:
+ (sour cherry)
+Lefttmost-longest segmentation:
+ (sour cherry)
+List of all segments:
+ squash:1701274966 [0,0]
+Weighted segmentation:
+ (squash)
+Rightmost-longest segmentation:
+ (squash)
+Lefttmost-longest segmentation:
+ (squash)
+List of all segments:
+ tomato:1701274966 [0,0]
+Weighted segmentation:
+ (tomato)
+Rightmost-longest segmentation:
+ (tomato)
+Lefttmost-longest segmentation:
+ (tomato)
+List of all segments:
+ alpha:0 [0,0]
+Weighted segmentation:
+ (alpha)
+Rightmost-longest segmentation:
+ (alpha)
+Lefttmost-longest segmentation:
+ (alpha)
+List of all segments:
+ beta:0 [0,0]
+Weighted segmentation:
+ (beta)
+Rightmost-longest segmentation:
+ (beta)
+Lefttmost-longest segmentation:
+ (beta)
+List of all segments:
+ gamma:0 [0,0]
+Weighted segmentation:
+ (gamma)
+Rightmost-longest segmentation:
+ (gamma)
+Lefttmost-longest segmentation:
+ (gamma)
+List of all segments:
+ delta:0 [0,0]
+Weighted segmentation:
+ (delta)
+Rightmost-longest segmentation:
+ (delta)
+Lefttmost-longest segmentation:
+ (delta)
+List of all segments:
+ epsilon:0 [0,0]
+Weighted segmentation:
+ (epsilon)
+Rightmost-longest segmentation:
+ (epsilon)
+Lefttmost-longest segmentation:
+ (epsilon)
diff --git a/fsa/src/alltest/segmenter_test.sh b/fsa/src/alltest/segmenter_test.sh
new file mode 100755
index 00000000000..d36a6d10057
--- /dev/null
+++ b/fsa/src/alltest/segmenter_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_segmenter_test_app < testinput.txt > segmenter_test.output
+diff segmenter_test.output segmenter_test.out
diff --git a/fsa/src/alltest/testinput.txt b/fsa/src/alltest/testinput.txt
new file mode 100644
index 00000000000..fa4afece710
--- /dev/null
+++ b/fsa/src/alltest/testinput.txt
@@ -0,0 +1,41 @@
+Cupertino
+Foster City
+Los Altos
+Menlo Park
+Mountain View
+Palo Alto
+San Francisco
+San Jose
+Santa Clara
+Saratoga
+Sunnyvale
+apple
+apricot
+artichoke
+banana
+cabbage
+carrot
+cherry
+chili
+cucumber
+eggplant
+grapes
+lettuce
+onion
+paprika
+passion fruit
+pea
+peach
+pear
+pineapple
+plum
+potato
+pumpkin
+sour cherry
+squash
+tomato
+alpha
+beta
+gamma
+delta
+epsilon
diff --git a/fsa/src/alltest/vectorizer_perftest.cpp b/fsa/src/alltest/vectorizer_perftest.cpp
new file mode 100644
index 00000000000..582652ec66d
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_perftest.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizertest.cpp
+ * @brief Test for the vectorizer class
+ *
+ */
+
+#include <string>
+#include <iostream>
+#include <iomanip>
+
+#include <vespa/fsa/vectorizer.h>
+#include <vespa/fsa/timestamp.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Vectorizer v(dict);
+ Vectorizer::TermVector tv;
+
+
+ std::string text =
+ "belfast northern ireland protestant extremists crashed a forklift "
+ "truck into a belfast pub packed with catholics early friday and tossed "
+ "gasoline bombs into the building on a road on the front line of "
+ "tensions between the two communities "
+ "no one was hurt in the attack police said, though the forklift came "
+ "crashing through a window just above a bench where a patron had been "
+ "sitting seconds earlier the bar s owner sean conlon said "
+ "the customer had just gotten up to go to the toilet so it s really "
+ "just by the grace of god still he s here today at all conlon said "
+ "a protestant gang used the stolen vehicle to smash down a heavy metal "
+ "security grill on a window at around 12 45 a m then to toss three "
+ "gasoline bombs inside the pub on the crumlin road an especially "
+ "polarized part of north belfast where catholic protestant tensions "
+ "have repeatedly flared "
+ "no group claimed responsibility for the attack on the thirty two "
+ "degrees north pub a catholic frequented bar across the street from a "
+ "hard line protestant district but catholic leaders blamed the largest "
+ "illegal protestant group the ulster defense association "
+ "firefighters quickly doused the flames caused by the gasoline "
+ "bombs the forklift remained wedged into the pub friday afternoon as "
+ "engineers and architects discussed whether the newly refurbished pub "
+ "would have to be partly demolished "
+ "the uda is supposed to be observing a cease fire in support of "
+ "northern ireland s 1998 peace accord but britain no longer recognizes "
+ "the validity of the uda truce because the anti catholic group has "
+ "violated it so often "
+ "the crumlin road area of north belfast has suffered some of northern "
+ "ireland s most graphic sectarian trouble in recent years while both "
+ "sides complain of suffering harassment and stone throwing protestants "
+ "in particular accuse the expanding catholic community of seeking to "
+ "force them from the area a charge the catholics deny. "
+ "protestant mobs in 2001 and 2002 blocked catholics from taking their "
+ "children to the local catholic elementary school which is in the "
+ "predominantly protestant part of the area "
+ "on july 12 hundreds of catholics from the area s ardoyne district "
+ "swarmed over police and british soldiers protecting a protestant "
+ "parade that had just passed down crumlin road dozens were wounded "
+ "demographic tensions lie at the heart of the northern ireland "
+ "conflict which was founded 84 years ago as a british territory with a "
+ "70 percent protestant majority the most recent census in 2001 put the "
+ "sectarian split at nearer 55 percent protestant and 45 percent "
+ "catholic and confirmed that belfast now has a catholic majority";
+
+ NGram tokenized_text(text);
+
+ TimeStamp t;
+ double t0,t1;
+ unsigned int count=1000;
+
+ std::cout << "Number of iterations: " << count << std::endl;
+ std::cout << "Input string length: " << text.length() << std::endl;
+ std::cout << "Number of input tokens: " << tokenized_text.length() << std::endl;
+ std::cout << std::endl;
+
+ t0=t.elapsed();
+ for(unsigned int i=0; i<count; ++i){
+ v.vectorize(tokenized_text,tv);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "Vectorizer performance: \t" << t1 << " sec" << "\t\t"
+ << count/t1 << " document/sec" << std::endl;
+ for(unsigned int i=0; i<tv.size(); i++){
+ std::cout << tv[i].term() << ", " << tv[i].weight() << std::endl;
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/vectorizer_test.cpp b/fsa/src/alltest/vectorizer_test.cpp
new file mode 100644
index 00000000000..e3bcf236455
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_test.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizertest.cpp
+ * @brief Test for the vectorizer class
+ *
+ */
+
+#include <iostream>
+#include <iomanip>
+
+#include <vespa/fsa/vectorizer.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Vectorizer v(dict);
+ Vectorizer::TermVector tv;
+
+ std::string text;
+ NGram tokenized_text;
+
+ while(!std::cin.eof()){
+ getline(std::cin,text);
+
+ tokenized_text.set(text);
+ v.vectorize(tokenized_text,tv);
+
+ for(unsigned int i=0; i<tv.size(); i++){
+ std::cout << tv[i].term() << ", " << tv[i].weight() << std::endl;
+ }
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/vectorizer_test.out b/fsa/src/alltest/vectorizer_test.out
new file mode 100644
index 00000000000..aa30421a2bf
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_test.out
@@ -0,0 +1,26 @@
+apple, 0
+apricot, 0
+artichoke, 0
+banana, 0
+cabbage, 0
+carrot, 0
+cherry, 0
+chili, 0
+cucumber, 0
+eggplant, 0
+grapes, 0
+lettuce, 0
+onion, 0
+paprika, 0
+passion fruit, 0
+pea, 0
+peach, 0
+pear, 0
+pineapple, 0
+plum, 0
+potato, 0
+pumpkin, 0
+cherry, 0
+sour cherry, 0
+squash, 0
+tomato, 0
diff --git a/fsa/src/alltest/vectorizer_test.sh b/fsa/src/alltest/vectorizer_test.sh
new file mode 100755
index 00000000000..03d794fc6e8
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_vectorizer_test_app < testinput.txt > vectorizer_test.output
+diff vectorizer_test.output vectorizer_test.out