aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/test/string_field_builder/string_field_builder_test.cpp
blob: 9d886e6cde767ed3dd00ca61d064a486808aca64 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include <vespa/searchlib/test/string_field_builder.h>
#include <vespa/document/annotation/annotation.h>
#include <vespa/document/annotation/span.h>
#include <vespa/document/annotation/spanlist.h>
#include <vespa/document/annotation/spantree.h>
#include <vespa/document/datatype/annotationtype.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/searchlib/test/doc_builder.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <cassert>
#include <iostream>

using document::Annotation;
using document::AnnotationType;
using document::Span;
using document::SpanNode;
using document::SpanTree;
using document::StringFieldValue;
using search::test::DocBuilder;
using search::test::StringFieldBuilder;

namespace
{

const vespalib::string SPANTREE_NAME("linguistics");

struct MyAnnotation {
    int32_t start;
    int32_t length;
    std::optional<vespalib::string> label;

    MyAnnotation(int32_t start_in, int32_t length_in) noexcept
        : start(start_in),
          length(length_in),
          label()
    {
    }

    MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept
        : start(start_in),
          length(length_in),
          label(label_in)
    {
    }

    bool operator==(const MyAnnotation& rhs) const noexcept;
};

bool
MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept
{
    return start == rhs.start &&
        length == rhs.length &&
        label == rhs.label;
}


std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) {
    os << "[" << ann.start << "," << ann.length << "]";
    if (ann.label.has_value()) {
        os << "(\"" << ann.label.value() << "\")";
    }
    return os;
}

}

class StringFieldBuilderTest : public testing::Test
{
protected:
    DocBuilder    db;
    StringFieldBuilder sfb;
    StringFieldBuilderTest();
    ~StringFieldBuilderTest();
    std::vector<MyAnnotation> get_annotations(const StringFieldValue& val);
    void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val);
};

StringFieldBuilderTest::StringFieldBuilderTest()
    : testing::Test(),
      db(),
      sfb(db)
{
}

StringFieldBuilderTest::~StringFieldBuilderTest() = default;

std::vector<MyAnnotation>
StringFieldBuilderTest::get_annotations(const StringFieldValue& val)
{
    std::vector<MyAnnotation> result;
    StringFieldValue::SpanTrees trees = val.getSpanTrees();
    const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
    if (tree != nullptr) {
        for (auto& ann : *tree) {
            assert(ann.getType() == *AnnotationType::TERM);
            auto span = dynamic_cast<const Span *>(ann.getSpanNode());
            if (span == nullptr) {
                continue;
            }
            auto ann_fv = ann.getFieldValue();
            if (ann_fv == nullptr) {
                result.emplace_back(span->from(), span->length());
            } else {
                result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue());
            }
        }
    }
    return result;
}

void
StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val)
{
    EXPECT_EQ(exp, get_annotations(val));
    EXPECT_EQ(plain, val.getValue());
}

TEST_F(StringFieldBuilderTest, no_annotations)
{
    assert_annotations({}, "foo", StringFieldValue("foo"));
}

TEST_F(StringFieldBuilderTest, single_word)
{
    assert_annotations({{0, 4}}, "word", sfb.word("word").build());
}

TEST_F(StringFieldBuilderTest, tokenize)
{
    assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build());
}

TEST_F(StringFieldBuilderTest, alt_word)
{
    assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build());
}

GTEST_MAIN_RUN_ALL_TESTS()