summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-08-28 13:56:12 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-08-28 14:19:54 +0200
commit1142904edeff52097156b05098d305ece5ba9c51 (patch)
tree1349417755040f8de67371c444367966cd697fda /searchlib
parentbae1efe6ef89764dc6340bdd70999e070cac0f89 (diff)
Cleanup leftovers for loading string attribute vector from non enumerated files.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.hpp40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h95
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp26
-rw-r--r--searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h15
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp108
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp14
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h5
12 files changed, 117 insertions, 235 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index ea33a4d552c..c190f8af023 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -59,7 +59,6 @@ vespa_add_library(searchlib_attribute OBJECT
load_utils.cpp
loadedenumvalue.cpp
loadednumericvalue.cpp
- loadedstringvalue.cpp
loadedvalue.cpp
multi_value_mapping.cpp
multi_value_mapping_base.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
index 0bc3d717509..55af5a874f9 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
@@ -5,6 +5,7 @@
#include "attributevector.h"
#include "loadedenumvalue.h"
#include "enumstore.h"
+#include "no_loaded_vector.h"
#include <set>
namespace search {
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
index 1268e7d3118..a5ba60cad4d 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
@@ -26,28 +26,30 @@ EnumAttribute<B>::~EnumAttribute()
template <typename B>
void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
{
- typename EnumStore::Builder builder;
- if (!loaded.empty()) {
- auto value = loaded.read();
- LoadedValueType prev = value.getValue();
- uint32_t prevRefCount(0);
- EnumIndex index = builder.insert(value.getValue(), value._pidx.ref());
- for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) {
- value = loaded.read();
- if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) {
- builder.updateRefCount(prevRefCount);
- index = builder.insert(value.getValue(), value._pidx.ref());
- prev = value.getValue();
- prevRefCount = 1;
- } else {
- prevRefCount++;
+ if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) {
+ typename EnumStore::Builder builder;
+ if (!loaded.empty()) {
+ auto value = loaded.read();
+ LoadedValueType prev = value.getValue();
+ uint32_t prevRefCount(0);
+ EnumIndex index = builder.insert(value.getValue(), value._pidx.ref());
+ for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) {
+ value = loaded.read();
+ if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) {
+ builder.updateRefCount(prevRefCount);
+ index = builder.insert(value.getValue(), value._pidx.ref());
+ prev = value.getValue();
+ prevRefCount = 1;
+ } else {
+ prevRefCount++;
+ }
+ value.setEidx(index);
+ loaded.write(value);
}
- value.setEidx(index);
- loaded.write(value);
+ builder.updateRefCount(prevRefCount);
}
- builder.updateRefCount(prevRefCount);
+ _enumStore.reset(builder);
}
- _enumStore.reset(builder);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp
deleted file mode 100644
index 83515d5d331..00000000000
--- a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "loadedstringvalue.h"
-
-using vespalib::Array;
-using vespalib::alloc::Alloc;
-
-namespace search {
-namespace attribute {
-
-void
-sortLoadedByValue(LoadedStringVectorReal &loaded)
-{
- Array<unsigned> radixScratchPad(loaded.size(), Alloc::allocMMap());
- for(size_t i(0), m(loaded.size()); i < m; i++) {
- loaded[i].prepareRadixSort();
- }
- radix_sort(LoadedStringValue::ValueRadix(),
- LoadedStringValue::ValueCompare(),
- AlwaysEof<LoadedStringValue>(),
- 1,
- &loaded[0],
- loaded.size(),
- &radixScratchPad[0],
- 0,
- 96);
-}
-
-void
-sortLoadedByDocId(LoadedStringVectorReal &loaded)
-{
- ShiftBasedRadixSorter<LoadedStringValue,
- LoadedStringValue::DocRadix,
- LoadedStringValue::DocOrderCompare, 56>::
- radix_sort(LoadedStringValue::DocRadix(),
- LoadedStringValue::DocOrderCompare(),
- &loaded[0],
- loaded.size(),
- 16);
-}
-
-
-} // namespace attribute
-} // namespace search
-
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h
deleted file mode 100644
index 6b4a93176f7..00000000000
--- a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/searchlib/common/sort.h>
-#include <vespa/searchlib/util/fileutil.h>
-#include <vespa/searchlib/util/foldedstringcompare.h>
-#include <vespa/vespalib/text/utf8.h>
-#include <vespa/vespalib/text/lowercase.h>
-#include "loadedvalue.h"
-
-namespace search
-{
-
-namespace attribute
-{
-
-/**
- * Temporary representation of enumerated attribute loaded from non-enumerated
- * save file (i.e. old save format). For string data types.
- */
-
-template <typename B>
-struct RadixSortable : public B
-{
- RadixSortable()
- : B(),
- _currRadix(NULL),
- _currRadixFolding(false)
- {
- }
-
- class ValueRadix
- {
- public:
- uint32_t
- operator ()(RadixSortable &x) const
- {
- vespalib::Utf8ReaderForZTS u8reader(x._currRadix);
- uint32_t val = u8reader.getChar();
- if (x._currRadixFolding) {
- if (val != 0) {
- val = vespalib::LowerCase::convert(val);
- } else {
- // switch to returning unfolded values
- x._currRadix = x.getValue();
- x._currRadixFolding = false;
- val = 1;
- }
- }
- return val;
- }
- };
-
- class ValueCompare : public std::binary_function<B, B, bool>
- {
- FoldedStringCompare _compareHelper;
- public:
- bool
- operator()(const B &x, const B &y) const
- {
- return _compareHelper.compare(x.getValue(), y.getValue()) < 0;
- }
- };
-
- void
- prepareRadixSort()
- {
- _currRadix = this->getValue();
- _currRadixFolding = true;
- }
-private:
- const char * _currRadix;
- bool _currRadixFolding;
-};
-
-typedef RadixSortable<LoadedValue<const char *> > LoadedStringValue;
-
-typedef SequentialReadModifyWriteInterface<LoadedStringValue> LoadedStringVector;
-
-typedef SequentialReadModifyWriteVector<LoadedStringValue>
-LoadedStringVectorReal;
-
-
-void
-sortLoadedByValue(LoadedStringVectorReal &loaded);
-
-void
-sortLoadedByDocId(LoadedStringVectorReal &loaded);
-
-
-} // namespace attribute
-
-} // namespace search
-
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
index 31cc0fa2d12..48c3649a9b2 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
@@ -6,6 +6,7 @@
#include "enumstorebase.h"
#include "loadedenumvalue.h"
#include "multivalue.h"
+#include "no_loaded_vector.h"
namespace search {
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index 3447ab6d168..5352dc492fd 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -84,20 +84,22 @@ template <typename B, typename M>
void
MultiValueEnumAttribute<B, M>::fillValues(LoadedVector & loaded)
{
- uint32_t numDocs(this->getNumDocs());
- size_t numValues = loaded.size();
- size_t count = 0;
- WeightedIndexVector indices;
- this->_mvMapping.prepareLoadFromMultiValue();
- for (DocId doc = 0; doc < numDocs; ++doc) {
- for(const auto* v = & loaded.read();(count < numValues) && (v->_docId == doc); count++, loaded.next(), v = & loaded.read()) {
- indices.push_back(WeightedIndex(v->getEidx(), v->getWeight()));
+ if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) {
+ uint32_t numDocs(this->getNumDocs());
+ size_t numValues = loaded.size();
+ size_t count = 0;
+ WeightedIndexVector indices;
+ this->_mvMapping.prepareLoadFromMultiValue();
+ for (DocId doc = 0; doc < numDocs; ++doc) {
+ for(const auto* v = & loaded.read();(count < numValues) && (v->_docId == doc); count++, loaded.next(), v = & loaded.read()) {
+ indices.push_back(WeightedIndex(v->getEidx(), v->getWeight()));
+ }
+ this->checkSetMaxValueCount(indices.size());
+ this->_mvMapping.set(doc, indices);
+ indices.clear();
}
- this->checkSetMaxValueCount(indices.size());
- this->_mvMapping.set(doc, indices);
- indices.clear();
+ this->_mvMapping.doneLoadFromMultiValue();
}
- this->_mvMapping.doneLoadFromMultiValue();
}
diff --git a/searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h b/searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h
new file mode 100644
index 00000000000..a3be87181bb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h
@@ -0,0 +1,15 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+/*
+ * Tag class, used to stub out code for loading enumerated attributes
+ * from non-enumerated files for data types where enumeration is
+ * mandatory.
+ */
+class NoLoadedVector {
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
index 29cb36c8e88..800e621045a 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
@@ -220,61 +220,61 @@ void
PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
handleFillPostings(LoadedVector &loaded)
{
- clearAllPostings();
- EntryRef newIndex;
- PostingChange<P> postings;
- uint32_t docIdLimit = _attr.getNumDocs();
- _postingList.resizeBitVectors(docIdLimit, docIdLimit);
- if ( ! loaded.empty() ) {
- vespalib::Array<typename LoadedVector::Type> similarValues;
- auto value = loaded.read();
- LoadedValueType prev = value.getValue();
- for (size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) {
- value = loaded.read();
- if (FoldedComparatorType::compareFolded(prev, value.getValue()) == 0) {
- // for single value attributes loaded[numDocs] is used
- // for default value but we don't want to add an
- // invalid docId to the posting list.
- if (value._docId < docIdLimit) {
- postings.add(value._docId, value.getWeight());
+ if constexpr (!std::is_same_v<LoadedVector, NoLoadedVector>) {
+ clearAllPostings();
+ EntryRef newIndex;
+ PostingChange<P> postings;
+ uint32_t docIdLimit = _attr.getNumDocs();
+ _postingList.resizeBitVectors(docIdLimit, docIdLimit);
+ if ( ! loaded.empty() ) {
+ vespalib::Array<typename LoadedVector::Type> similarValues;
+ auto value = loaded.read();
+ LoadedValueType prev = value.getValue();
+ for (size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) {
+ value = loaded.read();
+ if (FoldedComparatorType::compareFolded(prev, value.getValue()) == 0) {
+ // for single value attributes loaded[numDocs] is used
+ // for default value but we don't want to add an
+ // invalid docId to the posting list.
+ if (value._docId < docIdLimit) {
+ postings.add(value._docId, value.getWeight());
+ similarValues.push_back(value);
+ }
+ } else {
+ postings.removeDups();
+ newIndex = EntryRef();
+ _postingList.apply(newIndex,
+ &postings._additions[0],
+ &postings._additions[0] +
+ postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] +
+ postings._removals.size());
+ postings.clear();
+ if (value._docId < docIdLimit) {
+ postings.add(value._docId, value.getWeight());
+ }
+ similarValues[0]._pidx = newIndex;
+ for (size_t j(0), k(similarValues.size()); j < k; j++) {
+ loaded.write(similarValues[j]);
+ }
+ similarValues.clear();
similarValues.push_back(value);
+ prev = value.getValue();
}
- } else {
- postings.removeDups();
-
- newIndex = EntryRef();
- _postingList.apply(newIndex,
- &postings._additions[0],
- &postings._additions[0] +
- postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] +
- postings._removals.size());
- postings.clear();
- if (value._docId < docIdLimit) {
- postings.add(value._docId, value.getWeight());
- }
- similarValues[0]._pidx = newIndex;
- for (size_t j(0), k(similarValues.size()); j < k; j++) {
- loaded.write(similarValues[j]);
- }
- similarValues.clear();
- similarValues.push_back(value);
- prev = value.getValue();
}
- }
-
- postings.removeDups();
- newIndex = EntryRef();
- _postingList.apply(newIndex,
- &postings._additions[0],
- &postings._additions[0] +
- postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] + postings._removals.size());
- similarValues[0]._pidx = newIndex;
- for (size_t i(0), m(similarValues.size()); i < m; i++) {
- loaded.write(similarValues[i]);
+ postings.removeDups();
+ newIndex = EntryRef();
+ _postingList.apply(newIndex,
+ &postings._additions[0],
+ &postings._additions[0] +
+ postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] + postings._removals.size());
+ similarValues[0]._pidx = newIndex;
+ for (size_t i(0), m(similarValues.size()); i < m; i++) {
+ loaded.write(similarValues[i]);
+ }
}
}
}
@@ -359,7 +359,7 @@ PostingListAttributeSubBase<AttributePosting,
template class
PostingListAttributeSubBase<AttributePosting,
- attribute::LoadedStringVector,
+ NoLoadedVector,
const char *,
EnumStoreT<StringEntryType > >;
@@ -401,7 +401,7 @@ PostingListAttributeSubBase<AttributeWeightPosting,
template class
PostingListAttributeSubBase<AttributeWeightPosting,
- attribute::LoadedStringVector,
+ NoLoadedVector,
const char *,
EnumStoreT<StringEntryType > >;
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h
index 06c96e07a93..50e97e703c3 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h
@@ -13,6 +13,7 @@
#include <vespa/vespalib/btree/btreestore.h>
#include <vespa/vespalib/datastore/entry_comparator.h>
#include <vespa/vespalib/datastore/entryref.h>
+#include <map>
namespace search {
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 60a3a74b873..08095b6bf13 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -204,12 +204,14 @@ template <typename B>
void
SingleValueEnumAttribute<B>::fillValues(LoadedVector & loaded)
{
- uint32_t numDocs = this->getNumDocs();
- getGenerationHolder().clearHoldLists();
- _enumIndices.reset();
- _enumIndices.unsafe_reserve(numDocs);
- for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) {
- _enumIndices.push_back(loaded.read().getEidx());
+ if constexpr (!std::is_same_v<LoadedVector, NoLoadedVector>) {
+ uint32_t numDocs = this->getNumDocs();
+ getGenerationHolder().clearHoldLists();
+ _enumIndices.reset();
+ _enumIndices.unsafe_reserve(numDocs);
+ for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) {
+ _enumIndices.push_back(loaded.read().getEidx());
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index 21d36e66cf0..9e441aa2e0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -9,7 +9,7 @@
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/searchlib/attribute/enumstorebase.h>
#include <vespa/searchlib/attribute/loadedenumvalue.h>
-#include <vespa/searchlib/attribute/loadedstringvalue.h>
+#include "no_loaded_vector.h"
#include <vespa/searchlib/attribute/changevector.h>
namespace search {
@@ -25,7 +25,7 @@ public:
typedef IEnumStore::Index EnumIndex;
typedef IEnumStore::IndexVector EnumIndexVector;
typedef IEnumStore::EnumVector EnumVector;
- typedef attribute::LoadedStringVector LoadedVector;
+ using LoadedVector = NoLoadedVector;
public:
DECLARE_IDENTIFIABLE_ABSTRACT(StringAttribute);
bool append(DocId doc, const vespalib::string & v, int32_t weight) {
@@ -73,7 +73,6 @@ protected:
virtual vespalib::MemoryUsage getChangeVectorMemoryUsage() const override;
private:
- typedef attribute::LoadedStringVectorReal LoadedVectorR;
virtual void fillPostings(LoadedVector & loaded);
virtual void fillEnum(LoadedVector & loaded);
virtual void fillValues(LoadedVector & loaded);