diff options
Diffstat (limited to 'searchlib/src/vespa/searchlib/memoryindex')
49 files changed, 80 insertions, 207 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt index 34ac7d8e905..a093d6ae2e8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchlib_memoryindex OBJECT SOURCES bundled_fields_context.cpp diff --git a/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.cpp index 4f9e88b323e..8950b8711ef 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "bundled_fields_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.h b/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.h index c058c14832d..b3795432b12 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.h +++ b/searchlib/src/vespa/searchlib/memoryindex/bundled_fields_context.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.cpp index 59df4a731d2..6f4f847210d 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "compact_words_store.h" #include <vespa/vespalib/datastore/datastore.hpp> diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.h b/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.h index d90c04dc5b6..41e1ed8c18c 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/compact_words_store.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include <vespa/vespalib/datastore/datastore.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index c55de3890cd..0e52bf93369 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_inverter.h" #include "document_inverter_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h index d89bdad5bb8..c389e76b985 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp index d9b27735489..7f5b31d7315 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_inverter_collection.h" #include "document_inverter.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h index d07cca67e08..671f604e4a2 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp index 93a12c24257..d051f72f1a8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_inverter_context.h" #include <cassert> diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h index 552def934c2..793ff46ca2a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp index 4bc7f5b1144..035bbc71644 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "feature_store.h" #include <vespa/searchlib/index/schemautil.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h index 1e48189987e..5beb535abdd 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 199e9a4b8a0..18b5d749aeb 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "field_index.h" #include "ordered_field_index_inserter.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 187ec5ee971..9ae9d1b2aef 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp index ec4023a95e7..dd9cba849bc 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "field_index_base.h" #include "i_ordered_field_index_inserter.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h index 2d6d367af3b..3da98181f3c 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index c606b9b6340..bd933bb118f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "field_index_collection.h" #include "field_inverter.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h index a9f597e6296..6736ed2c2ad 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.cpp index 3cdf26d09ff..2b6e5e2a358 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "field_index_remover.h" #include "i_field_index_remove_listener.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.h index 429eea038c9..0bd9aa786b9 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_remover.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include "compact_words_store.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp index 8d23b235b07..a69260c6f45 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp @@ -1,13 +1,9 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "field_inverter.h" #include "ordered_field_index_inserter.h" -#include <vespa/document/annotation/alternatespanlist.h> #include <vespa/document/annotation/annotation.h> #include <vespa/document/annotation/span.h> -#include <vespa/document/annotation/spanlist.h> -#include <vespa/document/annotation/spantree.h> -#include <vespa/document/annotation/spantreevisitor.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> @@ -25,14 +21,9 @@ #include <vespa/vespalib/stllike/hash_map.hpp> #include <stdexcept> -#include <vespa/log/log.h> -LOG_SETUP(".searchlib.memoryindex.fieldinverter"); - namespace search::memoryindex { -using document::AlternateSpanList; using document::Annotation; -using document::AnnotationType; using document::ArrayFieldValue; using document::DataType; using document::Document; @@ -40,130 +31,34 @@ using document::DocumentType; using document::Field; using document::FieldValue; using document::IntFieldValue; -using document::SimpleSpanList; using document::Span; -using document::SpanList; -using document::SpanNode; -using document::SpanTree; -using document::SpanTreeVisitor; using document::StringFieldValue; using document::StructFieldValue; using document::WeightedSetFieldValue; using index::DocIdAndPosOccFeatures; using index::Schema; using search::index::schema::CollectionType; +using search::linguistics::TokenExtractor; using search::util::URL; using vespalib::make_string; using vespalib::datastore::Aligner; -namespace documentinverterkludge::linguistics { - -const vespalib::string SPANTREE_NAME("linguistics"); - -} - -using namespace documentinverterkludge; - -namespace { - -class SpanFinder : public SpanTreeVisitor { -public: - int32_t begin_pos; - int32_t end_pos; - - SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {} - Span span() { return Span(begin_pos, end_pos - begin_pos); } - - void visit(const Span &node) override { - begin_pos = std::min(begin_pos, node.from()); - end_pos = std::max(end_pos, node.from() + node.length()); - } - void visit(const SpanList &node) override { - for (const auto & span_ : node) { - const_cast<SpanNode *>(span_)->accept(*this); - } - } - void visit(const SimpleSpanList &node) override { - for (const auto & span_ : node) { - const_cast<Span &>(span_).accept(*this); - } - } - void visit(const AlternateSpanList &node) override { - for (size_t i = 0; i < node.getNumSubtrees(); ++i) { - visit(node.getSubtree(i)); - } - } -}; - -Span -getSpan(const SpanNode &span_node) -{ - SpanFinder finder; - // The SpanNode will not be changed. - const_cast<SpanNode &>(span_node).accept(finder); - return finder.span(); -} - -} - void FieldInverter::processAnnotations(const StringFieldValue &value, const Document& doc) { _terms.clear(); - StringFieldValue::SpanTrees spanTrees = value.getSpanTrees(); - const SpanTree *tree = StringFieldValue::findTree(spanTrees, linguistics::SPANTREE_NAME); - if (tree == nullptr) { - /* This is wrong unless field is exact match */ - const vespalib::string &text = value.getValue(); - if (text.empty()) { - return; - } - uint32_t wordRef = saveWord(text, &doc); - if (wordRef != 0u) { - add(wordRef); - stepWordPos(); - } - return; - } - const vespalib::string &text = value.getValue(); - for (const Annotation & annotation : *tree) { - const SpanNode *span = annotation.getSpanNode(); - if ((span != nullptr) && annotation.valid() && - (annotation.getType() == *AnnotationType::TERM)) - { - Span sp = getSpan(*span); - if (sp.length() != 0) { - _terms.push_back(std::make_pair(sp, - annotation.getFieldValue())); - } - } - } - std::sort(_terms.begin(), _terms.end()); + auto span_trees = value.getSpanTrees(); + vespalib::stringref text = value.getValueRef(); + _token_extractor.extract(_terms, span_trees, text, &doc); auto it = _terms.begin(); auto ite = _terms.end(); - uint32_t wordRef; - bool mustStep = false; for (; it != ite; ) { auto it_begin = it; - for (; it != ite && it->first == it_begin->first; ++it) { - if (it->second) { // it->second is a const FieldValue *. - wordRef = saveWord(*it->second, doc); - } else { - const Span &iSpan = it->first; - assert(iSpan.from() >= 0); - assert(iSpan.length() > 0); - wordRef = saveWord(vespalib::stringref(&text[iSpan.from()], - iSpan.length()), &doc); - } - if (wordRef != 0u) { - add(wordRef); - mustStep = true; - } - } - if (mustStep) { - stepWordPos(); - mustStep = false; + for (; it != ite && it->span == it_begin->span; ++it) { + uint32_t wordRef = saveWord(it->word); + add(wordRef); } + stepWordPos(); } } @@ -244,33 +139,19 @@ FieldInverter::endElement() } uint32_t -FieldInverter::saveWord(const vespalib::stringref word, const Document* doc) +FieldInverter::saveWord(vespalib::stringref word) { const size_t wordsSize = _words.size(); // assert((wordsSize & 3) == 0); // Check alignment - size_t len = strnlen(word.data(), word.size()); - if (len < word.size()) { - const Schema::IndexField &field = _schema.getIndexField(_fieldId); - LOG(error, "Detected NUL byte in word, length reduced from %zu to %zu, lid is %u, field is %s, truncated word is %s", word.size(), len, _docId, field.getName().c_str(), word.data()); - } - if (len > max_word_len && doc != nullptr) { - const Schema::IndexField& field = _schema.getIndexField(_fieldId); - LOG(warning, "Dropped too long word (len %zu > max len %zu) from document %s field %s, word prefix is %.100s", len, max_word_len, doc->getId().toString().c_str(), field.getName().c_str(), word.data()); - return 0u; - } - if (len == 0) { - return 0u; - } - - const size_t unpadded_size = wordsSize + 4 + len + 1; + const size_t unpadded_size = wordsSize + 4 + word.size() + 1; const size_t fullyPaddedSize = Aligner<4>::align(unpadded_size); _words.reserve(vespalib::roundUp2inN(fullyPaddedSize)); _words.resize(fullyPaddedSize); char * buf = &_words[0] + wordsSize; memset(buf, 0, 4); - memcpy(buf + 4, word.data(), len); - memset(buf + 4 + len, 0, fullyPaddedSize - unpadded_size + 1); + memcpy(buf + 4, word.data(), word.size()); + memset(buf + 4 + word.size(), 0, fullyPaddedSize - unpadded_size + 1); uint32_t wordRef = (wordsSize + 4) >> 2; // assert(wordRef != 0); @@ -278,20 +159,10 @@ FieldInverter::saveWord(const vespalib::stringref word, const Document* doc) return wordRef; } -uint32_t -FieldInverter::saveWord(const document::FieldValue &fv, const Document& doc) -{ - assert(fv.isA(FieldValue::Type::STRING)); - using RawRef = std::pair<const char*, size_t>; - RawRef sRef = fv.getAsRaw(); - return saveWord(vespalib::stringref(sRef.first, sRef.second), &doc); -} - void FieldInverter::remove(const vespalib::stringref word, uint32_t docId) { - uint32_t wordRef = saveWord(word, nullptr); - assert(wordRef != 0); + uint32_t wordRef = saveWord(word); _positions.emplace_back(wordRef, docId); } @@ -319,6 +190,17 @@ FieldInverter::endDoc() } void +FieldInverter::addWord(vespalib::stringref word, const document::Document& doc) +{ + word = _token_extractor.sanitize_word(word, &doc); + if (!word.empty()) { + uint32_t wordRef = saveWord(word); + add(wordRef); + stepWordPos(); + } +} + +void FieldInverter::processNormalDocTextField(const StringFieldValue &field, const Document& doc) { startElement(1); @@ -367,6 +249,7 @@ FieldInverter::FieldInverter(const Schema &schema, uint32_t fieldId, _docId(0), _oldPosSize(0), _schema(schema), + _token_extractor(_schema.getIndexField(_fieldId).getName(), max_word_len), _words(), _elems(), _positions(), diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index 2178efc31bf..4e3934ba322 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -1,10 +1,11 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include "i_field_index_remove_listener.h" #include <vespa/document/annotation/span.h> #include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/util/token_extractor.h> #include <vespa/vespalib/stllike/allocator.h> #include <vespa/vespalib/stllike/hash_map.h> #include <limits> @@ -172,6 +173,7 @@ private: uint32_t _oldPosSize; const index::Schema &_schema; + linguistics::TokenExtractor _token_extractor; WordBuffer _words; ElemInfoVec _elems; @@ -179,9 +181,8 @@ private: index::DocIdAndPosOccFeatures _features; UInt32Vector _wordRefs; - using SpanTerm = std::pair<document::Span, const document::FieldValue *>; - using SpanTermVector = std::vector<SpanTerm>; - SpanTermVector _terms; + using SpanTerm = linguistics::TokenExtractor::SpanTerm; + std::vector<SpanTerm> _terms; // Info about aborted and pending documents. std::vector<PositionRange> _abortedDocs; @@ -202,12 +203,7 @@ private: /** * Save the given word in the word buffer and return the word reference. */ - VESPA_DLL_LOCAL uint32_t saveWord(const vespalib::stringref word, const document::Document* doc); - - /** - * Save the field value as a word in the word buffer and return the word reference. - */ - VESPA_DLL_LOCAL uint32_t saveWord(const document::FieldValue &fv, const document::Document& doc); + VESPA_DLL_LOCAL uint32_t saveWord(vespalib::stringref word); /** * Get pointer to saved word from a word reference. @@ -326,13 +322,7 @@ public: void endDoc(); - void addWord(const vespalib::stringref word, const document::Document& doc) { - uint32_t wordRef = saveWord(word, &doc); - if (wordRef != 0u) { - add(wordRef); - stepWordPos(); - } - } + void addWord(vespalib::stringref word, const document::Document& doc); }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h index 420037fa72b..ee075290dc9 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h index bdfcdedbbf0..afd407f414e 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_insert_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_insert_listener.h index cf9dcee2f57..b72b8d271fe 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_insert_listener.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_insert_listener.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once #include <vespa/vespalib/datastore/entryref.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_remove_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_remove_listener.h index de03e5751c9..5257a1dba3d 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_remove_listener.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_remove_listener.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h index 551f15a5d76..9a27588cedb 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp index 1e6506bc8d5..262994222a3 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "invert_context.h" #include "document_inverter_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_context.h b/searchlib/src/vespa/searchlib/memoryindex/invert_context.h index 059fdb25d06..2ef6e07b833 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_context.h +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_context.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp b/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp index 13fb1d726b4..d0f63ac3cd6 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "invert_task.h" #include "document_inverter_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_task.h b/searchlib/src/vespa/searchlib/memoryindex/invert_task.h index a351fd2a10f..840d0d0daa7 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_task.h +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_task.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index 86421711e32..09a608f424f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "memory_index.h" #include "document_inverter.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 320c6fba277..b04274e52c7 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp index 1f2f660b0e6..1c093a9cd15 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "ordered_field_index_inserter.h" #include "i_field_index_insert_listener.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h index ed4c6d68b5f..cded212b0f4 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp index 48fc6873390..a1a45d479a0 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "posting_iterator.h" #include <vespa/searchlib/queryeval/iterators.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.h b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.h index 790f8bb3db7..30390ca76d4 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.h +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h index a8cc7fce1f2..0ad9331762f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. # pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/push_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/push_context.cpp index 5a4a773a6f5..7df17d816cb 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/push_context.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/push_context.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "push_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/push_context.h b/searchlib/src/vespa/searchlib/memoryindex/push_context.h index 0e96346837e..3198afa1dbe 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/push_context.h +++ b/searchlib/src/vespa/searchlib/memoryindex/push_context.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/push_task.cpp b/searchlib/src/vespa/searchlib/memoryindex/push_task.cpp index b68e23bfe02..52f9e59b76e 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/push_task.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/push_task.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "push_task.h" #include "push_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/push_task.h b/searchlib/src/vespa/searchlib/memoryindex/push_task.h index 002b9334b78..10e8dd32410 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/push_task.h +++ b/searchlib/src/vespa/searchlib/memoryindex/push_task.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/remove_task.cpp b/searchlib/src/vespa/searchlib/memoryindex/remove_task.cpp index d19abd50274..3ff80c11fbe 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/remove_task.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/remove_task.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "remove_task.h" #include "document_inverter_context.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/remove_task.h b/searchlib/src/vespa/searchlib/memoryindex/remove_task.h index 5eba4390752..3d96f0f6e70 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/remove_task.h +++ b/searchlib/src/vespa/searchlib/memoryindex/remove_task.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp index 32a2ab733fd..dc2ebd5bd60 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "url_field_inverter.h" #include "field_inverter.h" diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h index 45247c630e6..fd776b92d76 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once diff --git a/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp index e330dc83055..cc0591bf4ab 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "word_store.h" #include <vespa/vespalib/datastore/datastore.hpp> diff --git a/searchlib/src/vespa/searchlib/memoryindex/word_store.h b/searchlib/src/vespa/searchlib/memoryindex/word_store.h index 896bbf5d75e..a282f43813f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/word_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/word_store.h @@ -1,4 +1,4 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once |