From 32c43b76fcdb4f7226d2dffd55ea1334513b065b Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Thu, 11 Apr 2019 09:04:29 +0000 Subject: Rename featurestore* -> feature_store*. --- .../src/tests/memoryindex/datastore/.gitignore | 6 +- .../src/tests/memoryindex/datastore/CMakeLists.txt | 6 +- .../memoryindex/datastore/feature_store_test.cpp | 232 +++++++++++++++++++++ .../memoryindex/datastore/featurestore_test.cpp | 232 --------------------- .../src/vespa/searchlib/memoryindex/CMakeLists.txt | 2 +- .../vespa/searchlib/memoryindex/feature_store.cpp | 136 ++++++++++++ .../vespa/searchlib/memoryindex/feature_store.h | 203 ++++++++++++++++++ .../vespa/searchlib/memoryindex/featurestore.cpp | 136 ------------ .../src/vespa/searchlib/memoryindex/featurestore.h | 203 ------------------ .../src/vespa/searchlib/memoryindex/field_index.h | 2 +- .../vespa/searchlib/test/fakedata/fakememtreeocc.h | 2 +- 11 files changed, 579 insertions(+), 581 deletions(-) create mode 100644 searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp delete mode 100644 searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/feature_store.h delete mode 100644 searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp delete mode 100644 searchlib/src/vespa/searchlib/memoryindex/featurestore.h (limited to 'searchlib') diff --git a/searchlib/src/tests/memoryindex/datastore/.gitignore b/searchlib/src/tests/memoryindex/datastore/.gitignore index 98f4acc70a8..4b4a9e38d84 100644 --- a/searchlib/src/tests/memoryindex/datastore/.gitignore +++ b/searchlib/src/tests/memoryindex/datastore/.gitignore @@ -1,8 +1,6 @@ .depend Makefile -datastore_test -featurestore_test +feature_store_test wordstore_test -searchlib_datastore_test_app -searchlib_featurestore_test_app +searchlib_feature_store_test_app searchlib_wordstore_test_app diff --git a/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt index 2ba0f2eac63..fe613e425be 100644 --- a/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt +++ b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt @@ -1,11 +1,11 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_featurestore_test_app TEST +vespa_add_executable(searchlib_feature_store_test_app TEST SOURCES - featurestore_test.cpp + feature_store_test.cpp DEPENDS searchlib ) -vespa_add_test(NAME searchlib_featurestore_test_app COMMAND searchlib_featurestore_test_app) +vespa_add_test(NAME searchlib_feature_store_test_app COMMAND searchlib_feature_store_test_app) vespa_add_executable(searchlib_wordstore_test_app TEST SOURCES wordstore_test.cpp diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp new file mode 100644 index 00000000000..49e9d613861 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp @@ -0,0 +1,232 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +LOG_SETUP("feature_store_test"); +#include +#include + +using namespace search::btree; +using namespace search::datastore; +using namespace search::index; + +using search::index::schema::CollectionType; +using search::index::schema::DataType; + +namespace search +{ + + +namespace memoryindex +{ + + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + + const Schema & getSchema() const { return _schema; } + bool assertFeatures(const DocIdAndFeatures &exp, const DocIdAndFeatures &act); + void requireThatFeaturesCanBeAddedAndRetrieved(); + void requireThatNextWordsAreWorking(); + void requireThatAddFeaturesTriggersChangeOfBuffer(); + +public: + Test(); + int Main() override; +}; + + +bool +Test::assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act) +{ + // docid is not encoded as part of features + if (!EXPECT_EQUAL(exp._elements.size(), + act._elements.size())) + return false; + for (size_t i = 0; i < exp._elements.size(); ++i) { + if (!EXPECT_EQUAL(exp._elements[i]._elementId, + act._elements[i]._elementId)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._numOccs, + act._elements[i]._numOccs)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._elementLen, + act._elements[i]._elementLen)) + return false; + } + if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) + return false; + for (size_t i = 0; i < exp._wordPositions.size(); ++i) { + if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, + act._wordPositions[i]._wordPos)) return false; + } + return true; +} + + +DocIdAndFeatures +getFeatures(uint32_t numOccs, + int32_t weight, + uint32_t elemLen) +{ + DocIdAndFeatures f; + f._docId = 0; + f._elements.push_back(WordDocElementFeatures(0)); + f._elements.back().setNumOccs(numOccs); + f._elements.back().setWeight(weight); + f._elements.back().setElementLen(elemLen); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } + return f; +} + + +void +Test::requireThatFeaturesCanBeAddedAndRetrieved() +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatNextWordsAreWorking() +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatAddFeaturesTriggersChangeOfBuffer() +{ + FeatureStore fs(getSchema()); + size_t cnt = 1; + DocIdAndFeatures act; + uint32_t lastId = 0; + for (;;++cnt) { + uint32_t numOccs = (cnt % 100) + 1; + DocIdAndFeatures f = getFeatures(numOccs, 1, numOccs + 1); + std::pair r = fs.addFeatures(0, f); + fs.getFeatures(0, r.first, act); + EXPECT_TRUE(assertFeatures(f, act)); + uint32_t bufferId = FeatureStore::RefType(r.first).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu feature sets", + bufferId, cnt); + lastId = bufferId; + } + if (bufferId == 1) { + break; + } + } + EXPECT_EQUAL(1u, lastId); + LOG(info, "Added %zu feature sets in 1 buffer", cnt); +} + + +Test::Test() + : _schema() +{ + _schema.addIndexField(Schema::IndexField("f0", DataType::STRING)); + _schema.addIndexField(Schema::IndexField("f1", DataType::STRING, CollectionType::WEIGHTEDSET)); +} + + +int +Test::Main() +{ + TEST_INIT("feature_store_test"); + + requireThatFeaturesCanBeAddedAndRetrieved(); + requireThatNextWordsAreWorking(); + requireThatAddFeaturesTriggersChangeOfBuffer(); + + TEST_DONE(); +} + + +} + + +} + + +TEST_APPHOOK(search::memoryindex::Test); diff --git a/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp deleted file mode 100644 index dc061f55732..00000000000 --- a/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include -LOG_SETUP("featurestore_test"); -#include -#include - -using namespace search::btree; -using namespace search::datastore; -using namespace search::index; - -using search::index::schema::CollectionType; -using search::index::schema::DataType; - -namespace search -{ - - -namespace memoryindex -{ - - -class Test : public vespalib::TestApp -{ -private: - Schema _schema; - - const Schema & getSchema() const { return _schema; } - bool assertFeatures(const DocIdAndFeatures &exp, const DocIdAndFeatures &act); - void requireThatFeaturesCanBeAddedAndRetrieved(); - void requireThatNextWordsAreWorking(); - void requireThatAddFeaturesTriggersChangeOfBuffer(); - -public: - Test(); - int Main() override; -}; - - -bool -Test::assertFeatures(const DocIdAndFeatures &exp, - const DocIdAndFeatures &act) -{ - // docid is not encoded as part of features - if (!EXPECT_EQUAL(exp._elements.size(), - act._elements.size())) - return false; - for (size_t i = 0; i < exp._elements.size(); ++i) { - if (!EXPECT_EQUAL(exp._elements[i]._elementId, - act._elements[i]._elementId)) - return false; - if (!EXPECT_EQUAL(exp._elements[i]._numOccs, - act._elements[i]._numOccs)) - return false; - if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) - return false; - if (!EXPECT_EQUAL(exp._elements[i]._elementLen, - act._elements[i]._elementLen)) - return false; - } - if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) - return false; - for (size_t i = 0; i < exp._wordPositions.size(); ++i) { - if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, - act._wordPositions[i]._wordPos)) return false; - } - return true; -} - - -DocIdAndFeatures -getFeatures(uint32_t numOccs, - int32_t weight, - uint32_t elemLen) -{ - DocIdAndFeatures f; - f._docId = 0; - f._elements.push_back(WordDocElementFeatures(0)); - f._elements.back().setNumOccs(numOccs); - f._elements.back().setWeight(weight); - f._elements.back().setElementLen(elemLen); - for (uint32_t i = 0; i < numOccs; ++i) { - f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); - } - return f; -} - - -void -Test::requireThatFeaturesCanBeAddedAndRetrieved() -{ - FeatureStore fs(getSchema()); - DocIdAndFeatures act; - EntryRef r1; - EntryRef r2; - std::pair r; - { - DocIdAndFeatures f = getFeatures(2, 4, 8); - r = fs.addFeatures(0, f); - r1 = r.first; - EXPECT_TRUE(r.second > 0); - EXPECT_EQUAL(FeatureStore::RefType::align(1u), - FeatureStore::RefType(r1).offset()); - EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); - LOG(info, - "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", - r.second, - FeatureStore::RefType(r1).offset(), - FeatureStore::RefType(r1).bufferId()); - fs.getFeatures(0, r1, act); - // weight not encoded for single value - EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); - } - { - DocIdAndFeatures f = getFeatures(4, 8, 16); - r = fs.addFeatures(1, f); - r2 = r.first; - EXPECT_TRUE(r.second > 0); - EXPECT_TRUE(FeatureStore::RefType(r2).offset() > - FeatureStore::RefType(r1).offset()); - EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); - LOG(info, - "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", - r.second, - FeatureStore::RefType(r2).offset(), - FeatureStore::RefType(r2).bufferId()); - fs.getFeatures(1, r2, act); - EXPECT_TRUE(assertFeatures(f, act)); - } -} - - -void -Test::requireThatNextWordsAreWorking() -{ - FeatureStore fs(getSchema()); - DocIdAndFeatures act; - EntryRef r1; - EntryRef r2; - std::pair r; - { - DocIdAndFeatures f = getFeatures(2, 4, 8); - r = fs.addFeatures(0, f); - r1 = r.first; - EXPECT_TRUE(r.second > 0); - EXPECT_EQUAL(FeatureStore::RefType::align(1u), - FeatureStore::RefType(r1).offset()); - EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); - LOG(info, - "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", - r.second, - FeatureStore::RefType(r1).offset(), - FeatureStore::RefType(r1).bufferId()); - fs.getFeatures(0, r1, act); - // weight not encoded for single value - EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); - } - { - DocIdAndFeatures f = getFeatures(4, 8, 16); - r = fs.addFeatures(1, f); - r2 = r.first; - EXPECT_TRUE(r.second > 0); - EXPECT_TRUE(FeatureStore::RefType(r2).offset() > - FeatureStore::RefType(r1).offset()); - EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); - LOG(info, - "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", - r.second, - FeatureStore::RefType(r2).offset(), - FeatureStore::RefType(r2).bufferId()); - fs.getFeatures(1, r2, act); - EXPECT_TRUE(assertFeatures(f, act)); - } -} - - -void -Test::requireThatAddFeaturesTriggersChangeOfBuffer() -{ - FeatureStore fs(getSchema()); - size_t cnt = 1; - DocIdAndFeatures act; - uint32_t lastId = 0; - for (;;++cnt) { - uint32_t numOccs = (cnt % 100) + 1; - DocIdAndFeatures f = getFeatures(numOccs, 1, numOccs + 1); - std::pair r = fs.addFeatures(0, f); - fs.getFeatures(0, r.first, act); - EXPECT_TRUE(assertFeatures(f, act)); - uint32_t bufferId = FeatureStore::RefType(r.first).bufferId(); - if (bufferId > lastId) { - LOG(info, - "Changed to bufferId %u after %zu feature sets", - bufferId, cnt); - lastId = bufferId; - } - if (bufferId == 1) { - break; - } - } - EXPECT_EQUAL(1u, lastId); - LOG(info, "Added %zu feature sets in 1 buffer", cnt); -} - - -Test::Test() - : _schema() -{ - _schema.addIndexField(Schema::IndexField("f0", DataType::STRING)); - _schema.addIndexField(Schema::IndexField("f1", DataType::STRING, CollectionType::WEIGHTEDSET)); -} - - -int -Test::Main() -{ - TEST_INIT("featurestore_test"); - - requireThatFeaturesCanBeAddedAndRetrieved(); - requireThatNextWordsAreWorking(); - requireThatAddFeaturesTriggersChangeOfBuffer(); - - TEST_DONE(); -} - - -} - - -} - - -TEST_APPHOOK(search::memoryindex::Test); diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt index 7460fc1befe..eb09d228cce 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt @@ -3,7 +3,7 @@ vespa_add_library(searchlib_memoryindex OBJECT SOURCES compact_document_words_store.cpp document_inverter.cpp - featurestore.cpp + feature_store.cpp field_index.cpp field_index_collection.cpp field_index_remover.cpp diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp new file mode 100644 index 00000000000..974fcc01c36 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp @@ -0,0 +1,136 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "feature_store.h" +#include +#include + +namespace search::memoryindex { + +constexpr size_t MIN_BUFFER_ARRAYS = 1024u; + +using index::SchemaUtil; + +uint64_t +FeatureStore::writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features) +{ + _f._fieldsParams = &_fieldsParams[packedIndex]; + uint64_t oldOffset = _f.getWriteOffset(); + assert((oldOffset & 63) == 0); + if (oldOffset > 2000) { + _f.setupWrite(_fctx); + oldOffset = 0; + assert(_f.getWriteOffset() == oldOffset); + } + assert(!features.getRaw()); + _f.writeFeatures(features); + return oldOffset; +} + +datastore::EntryRef +FeatureStore::addFeatures(const uint8_t *src, uint64_t byteLen) +{ + uint32_t pad = RefType::pad(byteLen); + auto result = _store.rawAllocator(_typeId).alloc(byteLen + pad, DECODE_SAFETY); + uint8_t *dst = result.data; + memcpy(dst, src, byteLen); + dst += byteLen; + if (pad > 0) { + memset(dst, 0, pad); + dst += pad; + } + memset(dst, 0, DECODE_SAFETY); + return result.ref; +} + +std::pair +FeatureStore::addFeatures(uint64_t beginOffset, uint64_t endOffset) +{ + uint64_t bitLen = (endOffset - beginOffset); + assert(static_cast(bitLen) > 0); + uint64_t wordLen = (bitLen + 63) / 64; + uint64_t byteLen = (bitLen + 7) / 8; + assert(wordLen > 0); + assert(byteLen > 0); + const uint8_t *src = reinterpret_cast(_f._valI - wordLen); + RefType ref = addFeatures(src, byteLen); + return std::make_pair(ref, bitLen); +} + +datastore::EntryRef +FeatureStore::moveFeatures(datastore::EntryRef ref, uint64_t bitLen) +{ + const uint8_t *src = getBits(ref); + uint64_t byteLen = (bitLen + 7) / 8; + RefType newRef = addFeatures(src, byteLen); + // Mark old features as dead + _store.incDead(ref, byteLen + RefType::pad(byteLen)); + return newRef; +} + +FeatureStore::FeatureStore(const Schema &schema) + : _store(), + _f(nullptr), + _fctx(_f), + _d(nullptr), + _fieldsParams(), + _schema(schema), + _type(RefType::align(1u), MIN_BUFFER_ARRAYS, + RefType::offsetSize() / RefType::align(1u)), + _typeId(0) +{ + _f.setWriteContext(&_fctx); + _fctx.allocComprBuf(64, 1); + _f.afterWrite(_fctx, 0, 0); + + _fieldsParams.resize(_schema.getNumIndexFields()); + SchemaUtil::IndexIterator it(_schema); + for (; it.isValid(); ++it) { + _fieldsParams[it.getIndex()].setSchemaParams(_schema, it.getIndex()); + } + _store.addType(&_type); + _store.initActiveBuffers(); +} + +FeatureStore::~FeatureStore() +{ + _store.dropBuffers(); +} + +std::pair +FeatureStore::addFeatures(uint32_t packedIndex, const DocIdAndFeatures &features) +{ + uint64_t oldOffset = writeFeatures(packedIndex, features); + uint64_t newOffset = _f.getWriteOffset(); + _f.flush(); + return addFeatures(oldOffset, newOffset); +} + +void +FeatureStore::getFeatures(uint32_t packedIndex, datastore::EntryRef ref, DocIdAndFeatures &features) +{ + setupForField(packedIndex, _d); + setupForReadFeatures(ref, _d); + _d.readFeatures(features); +} + +size_t +FeatureStore::bitSize(uint32_t packedIndex, datastore::EntryRef ref) +{ + setupForField(packedIndex, _d); + setupForUnpackFeatures(ref, _d); + uint64_t oldOffset = _d.getReadOffset(); + _d.skipFeatures(1); + uint64_t newOffset = _d.getReadOffset(); + uint64_t bitLen = (newOffset - oldOffset); + assert(static_cast(bitLen) > 0); + return bitLen; +} + +datastore::EntryRef +FeatureStore::moveFeatures(uint32_t packedIndex, datastore::EntryRef ref) +{ + uint64_t bitLen = bitSize(packedIndex, ref); + return moveFeatures(ref, bitLen); +} + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h new file mode 100644 index 00000000000..94d44eaf44d --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -0,0 +1,203 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search::memoryindex { + +class FeatureStore { +public: + using DataStoreType = datastore::DataStoreT>; + using RefType = DataStoreType::RefType; + using EncodeContext = bitcompression::EG2PosOccEncodeContext; + using DecodeContextCooked = bitcompression::EG2PosOccDecodeContextCooked; + using generation_t = vespalib::GenerationHandler::generation_t; + +private: + using Schema = index::Schema; + using DocIdAndFeatures = index::DocIdAndFeatures; + using PosOccFieldsParams = bitcompression::PosOccFieldsParams; + + static const uint32_t DECODE_SAFETY = 16; + + DataStoreType _store; + + // Feature Encoder + EncodeContext _f; + // Buffer for compressed features. + ComprFileWriteContext _fctx; + + // Feature Decoder + DecodeContextCooked _d; + + // Coding parameters for fields and field collections, derived + // from schema. + std::vector _fieldsParams; + + const Schema &_schema; + + datastore::BufferType _type; + const uint32_t _typeId; + + /** + * Writes the given features to the underlying encode context. + * + * @param packedIndex the field or field collection owning features + * @param features the features to be encoded + * @return the encode offset before writing + */ + uint64_t writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features); + + /** + * Adds the features from the given buffer to the data store. + * + * @param src buffer with features + * @param byteLen the byte length of the buffer + * @return the entry ref for the added features + */ + datastore::EntryRef addFeatures(const uint8_t * src, uint64_t byteLen); + + /** + * Adds the features currently in the underlying encode context to the data store. + * + * @param beginOffset the begin offset into the encode context + * @param endOffset the end offset into the encode context + * @return the entry ref and bit length of the features + */ + std::pair addFeatures(uint64_t beginOffset, uint64_t endOffset); + + /** + * Moves features to new location, as part of compaction. + * + * @param ref old reference to stored features + * @param bitLen bit length of features to move + * @return new reference to stored features + */ + datastore::EntryRef moveFeatures(datastore::EntryRef ref, uint64_t bitLen); + +public: + + /** + * Constructor for feature store. + * + * @param schema The schema describing fields and field + * collections available, used to derive + * coding parameters. + */ + FeatureStore(const Schema &schema); + + ~FeatureStore(); + + /** + * Add features to feature store + * + * @param packedIndex The field or field collection owning features + * @param features The features to be encoded + * @return pair with reference to stored features and + * size of encoded features in bits + */ + std::pair addFeatures(uint32_t packedIndex, const DocIdAndFeatures &features); + + + /** + * Get features from feature store. Method signature is not + * const since feature decoder is written to during calculation. + * + * @param packedIndex The field or field collection owning features + * @param ref Reference to stored features + * @param features The features to be decoded + */ + void getFeatures(uint32_t packedIndex, datastore::EntryRef ref, DocIdAndFeatures &features); + + + /** + * Setup the given decoder to be used for the given field or field + * collection. + * + * @param packedIndex The field or field collection owning features + * @param decoder The feature decoder + */ + void setupForField(uint32_t packedIndex, DecodeContextCooked &decoder) const { + decoder._fieldsParams = &_fieldsParams[packedIndex]; + } + + /** + * Setup the given decoder to later use readFeatures() to decode + * the stored features. + * + * @param ref Reference to stored features + * @param decoder The feature decoder + */ + void setupForReadFeatures(datastore::EntryRef ref, DecodeContextCooked &decoder) const { + const uint8_t * bits = getBits(ref); + decoder.setByteCompr(bits); + uint32_t bufferId = RefType(ref).bufferId(); + const datastore::BufferState &state = _store.getBufferState(bufferId); + decoder.setEnd( + ((_store.getEntry(RefType(state.size(), bufferId)) - + bits) + 7) / 8, + false); + } + + /** + * Setup the given decoder to later use unpackFeatures() to decode + * the stored features. + * + * @param ref Reference to stored features + * @param decoder The feature decoder + */ + void setupForUnpackFeatures(datastore::EntryRef ref, DecodeContextCooked &decoder) const { + decoder.setByteCompr(getBits(ref)); + } + + /** + * Calculate size of encoded features. Method signature is not + * const since feature decoder is written to during calculation. + * + * @param packedIndex The field or field collection owning features + * @param ref Reference to stored features + * @return size of features in bits + */ + size_t bitSize(uint32_t packedIndex, datastore::EntryRef ref); + + /** + * Get byte address of stored features + * + * @param ref Referennce to stored features + * @return byte address of stored features + */ + const uint8_t *getBits(datastore::EntryRef ref) const { + RefType iRef(ref); + return _store.getEntry(iRef); + } + + /** + * Move features to new location, as part of compaction. + * + * @param packedIndex The field or field collection owning features + * @param ref Old reference to stored features + * @return New reference to stored features + */ + datastore::EntryRef moveFeatures(uint32_t packedIndex, datastore::EntryRef ref); + + /** + * Return a const view of the fields params used by this feature store. + * + * @return const view of fields params. + */ + const std::vector &getFieldsParams() const { return _fieldsParams; } + + void trimHoldLists(generation_t usedGen) { _store.trimHoldLists(usedGen); } + void transferHoldLists(generation_t generation) { _store.transferHoldLists(generation); } + void clearHoldLists() { _store.clearHoldLists();} + std::vector startCompact() { return _store.startCompact(_typeId); } + void finishCompact(const std::vector & toHold) { _store.finishCompact(toHold); } + MemoryUsage getMemoryUsage() const { return _store.getMemoryUsage(); } + datastore::DataStoreBase::MemStats getMemStats() const { return _store.getMemStats(); } +}; + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp deleted file mode 100644 index ae30af41d0a..00000000000 --- a/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "featurestore.h" -#include -#include - -namespace search::memoryindex { - -constexpr size_t MIN_BUFFER_ARRAYS = 1024u; - -using index::SchemaUtil; - -uint64_t -FeatureStore::writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features) -{ - _f._fieldsParams = &_fieldsParams[packedIndex]; - uint64_t oldOffset = _f.getWriteOffset(); - assert((oldOffset & 63) == 0); - if (oldOffset > 2000) { - _f.setupWrite(_fctx); - oldOffset = 0; - assert(_f.getWriteOffset() == oldOffset); - } - assert(!features.getRaw()); - _f.writeFeatures(features); - return oldOffset; -} - -datastore::EntryRef -FeatureStore::addFeatures(const uint8_t *src, uint64_t byteLen) -{ - uint32_t pad = RefType::pad(byteLen); - auto result = _store.rawAllocator(_typeId).alloc(byteLen + pad, DECODE_SAFETY); - uint8_t *dst = result.data; - memcpy(dst, src, byteLen); - dst += byteLen; - if (pad > 0) { - memset(dst, 0, pad); - dst += pad; - } - memset(dst, 0, DECODE_SAFETY); - return result.ref; -} - -std::pair -FeatureStore::addFeatures(uint64_t beginOffset, uint64_t endOffset) -{ - uint64_t bitLen = (endOffset - beginOffset); - assert(static_cast(bitLen) > 0); - uint64_t wordLen = (bitLen + 63) / 64; - uint64_t byteLen = (bitLen + 7) / 8; - assert(wordLen > 0); - assert(byteLen > 0); - const uint8_t *src = reinterpret_cast(_f._valI - wordLen); - RefType ref = addFeatures(src, byteLen); - return std::make_pair(ref, bitLen); -} - -datastore::EntryRef -FeatureStore::moveFeatures(datastore::EntryRef ref, uint64_t bitLen) -{ - const uint8_t *src = getBits(ref); - uint64_t byteLen = (bitLen + 7) / 8; - RefType newRef = addFeatures(src, byteLen); - // Mark old features as dead - _store.incDead(ref, byteLen + RefType::pad(byteLen)); - return newRef; -} - -FeatureStore::FeatureStore(const Schema &schema) - : _store(), - _f(nullptr), - _fctx(_f), - _d(nullptr), - _fieldsParams(), - _schema(schema), - _type(RefType::align(1u), MIN_BUFFER_ARRAYS, - RefType::offsetSize() / RefType::align(1u)), - _typeId(0) -{ - _f.setWriteContext(&_fctx); - _fctx.allocComprBuf(64, 1); - _f.afterWrite(_fctx, 0, 0); - - _fieldsParams.resize(_schema.getNumIndexFields()); - SchemaUtil::IndexIterator it(_schema); - for (; it.isValid(); ++it) { - _fieldsParams[it.getIndex()].setSchemaParams(_schema, it.getIndex()); - } - _store.addType(&_type); - _store.initActiveBuffers(); -} - -FeatureStore::~FeatureStore() -{ - _store.dropBuffers(); -} - -std::pair -FeatureStore::addFeatures(uint32_t packedIndex, const DocIdAndFeatures &features) -{ - uint64_t oldOffset = writeFeatures(packedIndex, features); - uint64_t newOffset = _f.getWriteOffset(); - _f.flush(); - return addFeatures(oldOffset, newOffset); -} - -void -FeatureStore::getFeatures(uint32_t packedIndex, datastore::EntryRef ref, DocIdAndFeatures &features) -{ - setupForField(packedIndex, _d); - setupForReadFeatures(ref, _d); - _d.readFeatures(features); -} - -size_t -FeatureStore::bitSize(uint32_t packedIndex, datastore::EntryRef ref) -{ - setupForField(packedIndex, _d); - setupForUnpackFeatures(ref, _d); - uint64_t oldOffset = _d.getReadOffset(); - _d.skipFeatures(1); - uint64_t newOffset = _d.getReadOffset(); - uint64_t bitLen = (newOffset - oldOffset); - assert(static_cast(bitLen) > 0); - return bitLen; -} - -datastore::EntryRef -FeatureStore::moveFeatures(uint32_t packedIndex, datastore::EntryRef ref) -{ - uint64_t bitLen = bitSize(packedIndex, ref); - return moveFeatures(ref, bitLen); -} - -} diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.h b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h deleted file mode 100644 index 94d44eaf44d..00000000000 --- a/searchlib/src/vespa/searchlib/memoryindex/featurestore.h +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include -#include -#include -#include - -namespace search::memoryindex { - -class FeatureStore { -public: - using DataStoreType = datastore::DataStoreT>; - using RefType = DataStoreType::RefType; - using EncodeContext = bitcompression::EG2PosOccEncodeContext; - using DecodeContextCooked = bitcompression::EG2PosOccDecodeContextCooked; - using generation_t = vespalib::GenerationHandler::generation_t; - -private: - using Schema = index::Schema; - using DocIdAndFeatures = index::DocIdAndFeatures; - using PosOccFieldsParams = bitcompression::PosOccFieldsParams; - - static const uint32_t DECODE_SAFETY = 16; - - DataStoreType _store; - - // Feature Encoder - EncodeContext _f; - // Buffer for compressed features. - ComprFileWriteContext _fctx; - - // Feature Decoder - DecodeContextCooked _d; - - // Coding parameters for fields and field collections, derived - // from schema. - std::vector _fieldsParams; - - const Schema &_schema; - - datastore::BufferType _type; - const uint32_t _typeId; - - /** - * Writes the given features to the underlying encode context. - * - * @param packedIndex the field or field collection owning features - * @param features the features to be encoded - * @return the encode offset before writing - */ - uint64_t writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features); - - /** - * Adds the features from the given buffer to the data store. - * - * @param src buffer with features - * @param byteLen the byte length of the buffer - * @return the entry ref for the added features - */ - datastore::EntryRef addFeatures(const uint8_t * src, uint64_t byteLen); - - /** - * Adds the features currently in the underlying encode context to the data store. - * - * @param beginOffset the begin offset into the encode context - * @param endOffset the end offset into the encode context - * @return the entry ref and bit length of the features - */ - std::pair addFeatures(uint64_t beginOffset, uint64_t endOffset); - - /** - * Moves features to new location, as part of compaction. - * - * @param ref old reference to stored features - * @param bitLen bit length of features to move - * @return new reference to stored features - */ - datastore::EntryRef moveFeatures(datastore::EntryRef ref, uint64_t bitLen); - -public: - - /** - * Constructor for feature store. - * - * @param schema The schema describing fields and field - * collections available, used to derive - * coding parameters. - */ - FeatureStore(const Schema &schema); - - ~FeatureStore(); - - /** - * Add features to feature store - * - * @param packedIndex The field or field collection owning features - * @param features The features to be encoded - * @return pair with reference to stored features and - * size of encoded features in bits - */ - std::pair addFeatures(uint32_t packedIndex, const DocIdAndFeatures &features); - - - /** - * Get features from feature store. Method signature is not - * const since feature decoder is written to during calculation. - * - * @param packedIndex The field or field collection owning features - * @param ref Reference to stored features - * @param features The features to be decoded - */ - void getFeatures(uint32_t packedIndex, datastore::EntryRef ref, DocIdAndFeatures &features); - - - /** - * Setup the given decoder to be used for the given field or field - * collection. - * - * @param packedIndex The field or field collection owning features - * @param decoder The feature decoder - */ - void setupForField(uint32_t packedIndex, DecodeContextCooked &decoder) const { - decoder._fieldsParams = &_fieldsParams[packedIndex]; - } - - /** - * Setup the given decoder to later use readFeatures() to decode - * the stored features. - * - * @param ref Reference to stored features - * @param decoder The feature decoder - */ - void setupForReadFeatures(datastore::EntryRef ref, DecodeContextCooked &decoder) const { - const uint8_t * bits = getBits(ref); - decoder.setByteCompr(bits); - uint32_t bufferId = RefType(ref).bufferId(); - const datastore::BufferState &state = _store.getBufferState(bufferId); - decoder.setEnd( - ((_store.getEntry(RefType(state.size(), bufferId)) - - bits) + 7) / 8, - false); - } - - /** - * Setup the given decoder to later use unpackFeatures() to decode - * the stored features. - * - * @param ref Reference to stored features - * @param decoder The feature decoder - */ - void setupForUnpackFeatures(datastore::EntryRef ref, DecodeContextCooked &decoder) const { - decoder.setByteCompr(getBits(ref)); - } - - /** - * Calculate size of encoded features. Method signature is not - * const since feature decoder is written to during calculation. - * - * @param packedIndex The field or field collection owning features - * @param ref Reference to stored features - * @return size of features in bits - */ - size_t bitSize(uint32_t packedIndex, datastore::EntryRef ref); - - /** - * Get byte address of stored features - * - * @param ref Referennce to stored features - * @return byte address of stored features - */ - const uint8_t *getBits(datastore::EntryRef ref) const { - RefType iRef(ref); - return _store.getEntry(iRef); - } - - /** - * Move features to new location, as part of compaction. - * - * @param packedIndex The field or field collection owning features - * @param ref Old reference to stored features - * @return New reference to stored features - */ - datastore::EntryRef moveFeatures(uint32_t packedIndex, datastore::EntryRef ref); - - /** - * Return a const view of the fields params used by this feature store. - * - * @return const view of fields params. - */ - const std::vector &getFieldsParams() const { return _fieldsParams; } - - void trimHoldLists(generation_t usedGen) { _store.trimHoldLists(usedGen); } - void transferHoldLists(generation_t generation) { _store.transferHoldLists(generation); } - void clearHoldLists() { _store.clearHoldLists();} - std::vector startCompact() { return _store.startCompact(_typeId); } - void finishCompact(const std::vector & toHold) { _store.finishCompact(toHold); } - MemoryUsage getMemoryUsage() const { return _store.getMemoryUsage(); } - datastore::DataStoreBase::MemStats getMemStats() const { return _store.getMemStats(); } -}; - -} diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 75c4f6c49eb..999b1bcd3b7 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -2,7 +2,7 @@ #pragma once -#include "featurestore.h" +#include "feature_store.h" #include "field_index_remover.h" #include "wordstore.h" #include diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h index 7fa46fc7531..f0363500559 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h @@ -4,7 +4,7 @@ #include "fakeword.h" #include "fakeposting.h" #include "fpfactory.h" -#include +#include #include #include #include -- cgit v1.2.3