aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib/src/tests/text/stringtokenizer/stringtokenizer_test.cpp
blob: 67e76dbd60b48a46369c80e84e88534436e3783b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/log/log.h>
LOG_SETUP("stringtokenizer_test");
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/text/stringtokenizer.h>
#include <set>

using namespace vespalib;

TEST_SETUP(Test);

int
Test::Main()
{
    TEST_INIT("stringtokenizer_test");
    {
        string s("This,is ,a,,list ,\tof,,sepa rated\n, \rtokens,");
        StringTokenizer tokenizer(s);
        std::vector<string> result;
        result.push_back("This");
        result.push_back("is");
        result.push_back("a");
        result.push_back("");
        result.push_back("list");
        result.push_back("of");
        result.push_back("");
        result.push_back("sepa rated");
        result.push_back("tokens");
        result.push_back("");

        EXPECT_EQUAL(result.size(),
                             static_cast<size_t>(tokenizer.size()));
        for (unsigned int i=0; i<result.size(); i++)
            EXPECT_EQUAL(result[i], tokenizer[i]);
        std::set<string> sorted(tokenizer.begin(), tokenizer.end());
        EXPECT_EQUAL(static_cast<size_t>(8u), sorted.size());

        tokenizer.removeEmptyTokens();
        EXPECT_EQUAL(7u, tokenizer.size());
    }
    {
        string s("\tAnother list with some \ntokens, and stuff.");
        StringTokenizer tokenizer(s, " \t\n", ",.");
        std::vector<string> result;
        result.push_back("");
        result.push_back("Another");
        result.push_back("list");
        result.push_back("with");
        result.push_back("some");
        result.push_back("");
        result.push_back("tokens");
        result.push_back("and");
        result.push_back("stuff");

        EXPECT_EQUAL(result.size(),
                             static_cast<size_t>(tokenizer.size()));
        for (unsigned int i=0; i<result.size(); i++)
            EXPECT_EQUAL(result[i], tokenizer[i]);
        std::set<string> sorted(tokenizer.begin(), tokenizer.end());
        EXPECT_EQUAL(static_cast<size_t>(8u), sorted.size());

        tokenizer.removeEmptyTokens();
        EXPECT_EQUAL(7u, tokenizer.size());
    }
    {
        string s(" ");
        StringTokenizer tokenizer(s);
        EXPECT_EQUAL(0u, tokenizer.size());
    }
    TEST_DONE();
}