aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
blob: 381dab9c844ec4d6a17c27b5537f62f621e5e31c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "collectiontype.h"
#include "basictype.h"
#include <vespa/searchcommon/common/iblobconverter.h>
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
#include <vespa/vespalib/util/arrayref.h>
#include <ostream>
#include <vector>

namespace search {
    struct IDocumentWeightAttribute;
    class QueryTermSimple;
}

namespace search::tensor {
    class ITensorAttribute;
}

namespace search::attribute {

class IMultiValueAttribute;
class ISearchContext;
class SearchContextParams;

/**
 * This class is used to store a value and a weight.
 * It is used when getting content from a weighted set attribute vector.
 *
 * @param T the type of the value stored in this object
 **/
template <typename T>
class WeightedType
{
private:
    T       _value;
    int32_t _weight;

public:
    WeightedType() noexcept : _value(T()), _weight(1) { }
    WeightedType(T value_, int32_t weight_ = 1) noexcept : _value(value_), _weight(weight_) { }
    const T & getValue() const { return _value; }
    const T & value() const { return _value; }
    void setValue(const T & v) { _value = v; }
    int32_t getWeight()  const { return _weight; }
    int32_t weight()  const { return _weight; }
    void setWeight(int32_t w)  { _weight = w; }
    bool operator==(const WeightedType & rhs) const {
        return _value == rhs._value && _weight == rhs._weight;
    }
};

template <typename T>
std::ostream&
operator<<(std::ostream& os, const WeightedType<T>& value)
{
    os << "{" << value.value() << "," << value.weight() << "}";
    return os;
}

/**
 * This is a read interface used to access the content of an attribute vector.
 **/
class IAttributeVector
{
public:
    using SP = std::shared_ptr<IAttributeVector>;
    using DocId = uint32_t;
    using EnumHandle = uint32_t;
    using largeint_t = int64_t;
    using WeightedFloat = WeightedType<double>;
    using WeightedInt = WeightedType<largeint_t>;
    using WeightedEnum = WeightedType<EnumHandle>;
    using WeightedConstChar = WeightedType<const char *>;
    using WeightedString = WeightedType<vespalib::string>;
    using EnumRefs = vespalib::ConstArrayRef<vespalib::datastore::AtomicEntryRef>;

    /**
     * Returns the name of this attribute vector.
     *
     * @return attribute name
     **/
    virtual const vespalib::string & getName() const = 0;

    vespalib::stringref getNamePrefix() const {
        vespalib::stringref name = getName();
        return name.substr(0, name.find('.'));
    }

    /**
     * Returns the number of documents stored in this attribute vector.
     *
     * @return number of documents
     **/
    virtual uint32_t getNumDocs() const = 0;

    /**
     * Returns the number of values stored for the given document.
     *
     * @return number of values
     * @param doc document identifier
     **/
    virtual uint32_t getValueCount(uint32_t doc) const = 0;

    /**
     * Returns the maximum number of values stored for any document.
     *
     * @return maximum number of values
     **/
    virtual uint32_t getMaxValueCount() const = 0;

    /**
     * Returns the first value stored for the given document as an integer.
     *
     * @param docId document identifier
     * @return the integer value
     **/
    virtual largeint_t getInt(DocId doc) const = 0;

    /**
     * Returns the first value stored for the given document as a floating point number.
     *
     * @param docId document identifier
     * @return the floating point value
     **/
    virtual double getFloat(DocId doc)   const = 0;

    /**
     * Return raw value.
     *
     * TODO: Consider accessing via new IRawAttribute interface class.
     */
    virtual vespalib::ConstArrayRef<char> get_raw(DocId doc) const = 0;

    /**
     * Returns the first value stored for the given document as an enum value.
     *
     * @param docId document identifier
     * @return the enum value
     **/
    virtual EnumHandle getEnum(DocId doc)   const = 0;

    /**
     * Copies the values stored for the given document into the given buffer.
     *
     * @param docId document identifier
     * @param buffer content buffer to copy integer values into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, largeint_t * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values stored for the given document into the given buffer.
     *
     * @param docId document identifier
     * @param buffer content buffer to copy floating point values into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, double * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values stored for the given document into the given buffer.
     *
     * @param docId document identifier
     * @param buffer content buffer to copy string values into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
//    virtual uint32_t get(DocId docId, vespalib::string * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values stored for the given document into the given buffer.
     *
     * @param docId document identifier
     * @param buffer content buffer to copy const char values into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, const char ** buffer, uint32_t sz) const = 0;

    /**
     * Copies the enum values stored for the given document into the given buffer.
     *
     * @param docId document identifier
     * @param buffer content object to copy enum into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, EnumHandle * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values and weights stored for the given document into the given buffer.
     * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
     *
     * @param docId document identifier
     * @param buffer content object to copy integer values and weights into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, WeightedInt * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values and weights stored for the given document into the given buffer.
     * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
     *
     * @param docId document identifier
     * @param buffer content object to copy floating point values and weights into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, WeightedFloat * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values and weights stored for the given document into the given buffer.
     * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
     *
     * @param docId document identifier
     * @param buffer content object to copy string values and weights into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, WeightedString * buffer, uint32_t sz) const = 0;

    /**
     * Copies the values and weights stored for the given document into the given buffer.
     * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
     *
     * @param docId document identifier
     * @param buffer content object to copy const char values and weights into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, WeightedConstChar * buffer, uint32_t sz) const = 0;

    /**
     * Copies the enum values and weights stored for the given document into the given buffer.
     * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
     *
     * @param docId document identifier
     * @param buffer content object to copy enum values and weights into
     * @param sz the size of the buffer
     * @return the number of values for this document
     **/
    virtual uint32_t get(DocId docId, WeightedEnum * buffer, uint32_t sz) const = 0;

    /**
     * Finds the enum value for the given string value.
     * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
     * @ref hasEnum() returns true.
     *
     * @param value the string value to lookup.
     * @param e the handle in which to store the enum value.
     * @return true if found.
     **/
    virtual bool findEnum(const char * value, EnumHandle & e) const = 0;

    /**
     * Finds all enum values matching the given string value.
     * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
     * @ref hasEnum() returns true.
     *
     * @param value the string value to lookup.
     * @return vector of EnumHandles, size 0 if no match found.
     **/
    virtual std::vector<EnumHandle> findFoldedEnums(const char * value) const = 0;

    /**
     * Given an enum handle, returns the string it refers to.
     * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
     * @ref hasEnum() returns true.
     *
     * Effectively functions as the inverse of @ref findEnum(value, handle)
     *
     * @param e a valid enum handle
     * @return enum string value, or nullptr if attribute type does
     *         not support enum handle lookups.
     */
    virtual const char * getStringFromEnum(EnumHandle e) const = 0;

    /**
     * Creates a context for searching this attribute with the given term.
     * The search context is used to create the actual search iterator.
     *
     * @param term the term to search for.
     * @param params optional bitvector and diversity settings for the search.
     * @return the search context.
     **/
    virtual std::unique_ptr<ISearchContext> createSearchContext(std::unique_ptr<QueryTermSimple> term,
                                                                const SearchContextParams &params) const = 0;

    /**
     * Type-safe down-cast to an attribute supporting direct document weight iterators.
     *
     * @return document weight attribute or nullptr if not supported.
     */
    virtual const IDocumentWeightAttribute *asDocumentWeightAttribute() const = 0;

    /**
     * Type-safe down-cast to a tensor attribute.
     *
     * @return tensor attribute or nullptr if not supported.
     */
    virtual const tensor::ITensorAttribute *asTensorAttribute() const = 0;

    /**
     * Type-safe down-cast to a multi-value attribute.
     *
     * @return multi-value attribute or nullptr if not supported.
     */
    virtual const IMultiValueAttribute* as_multi_value_attribute() const = 0;

    /**
     * Returns the basic type of this attribute vector.
     *
     * @return basic type
     **/
    virtual BasicType::Type getBasicType() const = 0;

    /**
     * Returns the number of bytes a single value in this attribute occupies.
     **/
    virtual size_t getFixedWidth() const = 0;

    /**
     * Returns the collection type of this attribute vector.
     *
     * @return collection type
     **/
    virtual CollectionType::Type getCollectionType() const = 0;

    /**
     * Returns whether this is an integer attribute.
     **/
    virtual bool isIntegerType() const {
        BasicType::Type t = getBasicType();
        return t == BasicType::BOOL ||
               t == BasicType::UINT2 ||
               t == BasicType::UINT4 ||
               t == BasicType::INT8 ||
               t == BasicType::INT16 ||
               t == BasicType::INT32 ||
               t == BasicType::INT64;
    }

    /**
     * Returns whether this is a floating point attribute.
     **/
    virtual bool isFloatingPointType() const {
        BasicType::Type t = getBasicType();
        return t == BasicType::FLOAT || t == BasicType::DOUBLE;
    }

    /**
     * Returns whether this is a string attribute.
     **/
    virtual bool isStringType() const {
        return getBasicType() == BasicType::STRING;
    }


    virtual bool isPredicateType() const { return getBasicType() == BasicType::PREDICATE; }
    virtual bool isTensorType() const { return getBasicType() == BasicType::TENSOR; }
    virtual bool isReferenceType() const { return getBasicType() == BasicType::REFERENCE; }
    virtual bool is_raw_type() const noexcept {
        BasicType::Type t = getBasicType();
        return t == BasicType::RAW ||
               t == BasicType::STRING;
    }

    /**
     * Returns whether this is a multi value attribute.
     **/
    virtual bool hasMultiValue() const {
        return getCollectionType() != CollectionType::SINGLE;
    }

    /**
     * Returns whether this is a weighted set attribute.
     **/
    virtual bool hasWeightedSetType() const {
        return getCollectionType() == CollectionType::WSET;
    }

    /**
     * Returns whether this attribute vector has underlying enum values.
     *
     * @return true if it has enum values.
     **/
    virtual bool hasEnum() const = 0;

    /**
     * Returns whether the attribute vector is a filter attribute.
     *
     * @return true if attribute vector is a filter attribute.
     */
    virtual bool getIsFilter() const = 0;

    /**
     * Returns whether the attribute vector is marked as fast search.
     *
     * @return true if attribute vector is marked as fast search.
     */
    virtual bool getIsFastSearch() const = 0;

    /**
     * Returns the committed docid limit for the attribute.
     *
     * @return committed docid limit for the attribute.
     */
    virtual uint32_t getCommittedDocIdLimit() const = 0;

    /*
     * Returns whether the current attribute vector is an imported attribute
     * vector.
     */
    virtual bool isImported() const = 0;

    /**
     * Will serialize the values for the documentid in ascending order. The serialized form can be used by memcmp and
     * sortorder will be preserved.
     * @param doc The document id to serialize for.
     * @param serTo The buffer to serialize into.
     * @param available. Number of bytes available in the serialization buffer.
     * @param bc An optional converter to use.
     * @return The number of bytes serialized, -1 if not enough space.
     */
    long serializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc=nullptr) const {
        return onSerializeForAscendingSort(doc, serTo, available, bc);
    }
    /**
     * Will serialize the values for the documentid in descending order. The serialized form can be used by memcmp and
     * sortorder will be preserved.
     * @param doc The document id to serialize for.
     * @param serTo The buffer to serialize into.
     * @param available. Number of bytes available in the serialization buffer.
     * @param bc An optional converter to use.
     * @return The number of bytes serialized, -1 if not enough space.
     */
    long serializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc=nullptr) const {
        return onSerializeForDescendingSort(doc, serTo, available, bc);
    }

    /**
     * Virtual destructor to allow safe subclassing.
     **/
    virtual ~IAttributeVector() = default;

    /**
     * This method is used to simulate sparseness in the single value attributes.
     * @param doc The document id to verify if attribute has a undefined value for this document.
     * @return true if value is undefined.
     */
    virtual bool isUndefined(DocId doc) const { (void) doc; return false; }

    /**
     * Will return a readonly view of any single value enumeration. If not applicable an empty one will be returned.
     *
     * @return returns a readonly enumrefs view with entries equal to number of docs committed.
     */
    virtual EnumRefs make_enum_read_view() const noexcept {
        return EnumRefs();
    }

private:
    virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const = 0;
    virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const = 0;

};

}