diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-04-30 12:18:43 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-04-30 13:34:52 +0000 |
commit | 4f0c3f4af63676003da3c2a4d695f8448e742082 (patch) | |
tree | 1a926552d84f8135a39e0ad8c07be763810b03a2 /searchlib | |
parent | bd2c1c1e13ab2893dad455c4b96a667336fbdc58 (diff) |
Make member variables in DocIdAndFeatures non-public.
Diffstat (limited to 'searchlib')
20 files changed, 163 insertions, 161 deletions
diff --git a/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp index 4e4d90e6871..90953f78c40 100644 --- a/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp +++ b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp @@ -94,15 +94,13 @@ unpackFeatures(std::vector<PosEntry> &entries, uint64_t wordNum, const DocIdAndFeatures &features) { - std::vector<search::index::WordDocElementFeatures>::const_iterator - element = features._elements.begin(); - std::vector<search::index::WordDocElementWordPosFeatures>:: - const_iterator position = features._wordPositions.begin(); - uint32_t numElements = features._elements.size(); + auto element = features.elements().begin(); + auto position = features.word_positions().begin(); + uint32_t numElements = features.elements().size(); while (numElements--) { uint32_t numOccs = element->getNumOccs(); while (numOccs--) { - entries.push_back(PosEntry(features._docId, + entries.push_back(PosEntry(features.doc_id(), fieldId, element->getElementId(), position->getWordPos(), @@ -447,7 +445,7 @@ ShowPostingListSubApp::readPostings(const SchemaUtil::IndexIterator &index, if (r.isValid()) r.read(); while (r.isValid()) { - uint32_t docId = r._docIdAndFeatures._docId; + uint32_t docId = r._docIdAndFeatures.doc_id(); if (docId >= _minDocId && docId < _docIdLimit) { unpackFeatures(entries, index.getIndex(), r._wordNum, r._docIdAndFeatures); diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp index e33158e559f..fab2ed734cd 100644 --- a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp @@ -62,10 +62,10 @@ FieldWriterWrapper & FieldWriterWrapper::add(uint32_t docId) { DocIdAndFeatures daf; - daf._docId = docId; - daf._elements.push_back(WordDocElementFeatures(0)); - daf._elements.back().setNumOccs(1); - daf._wordPositions.push_back(WordDocElementWordPosFeatures(0)); + daf.set_doc_id(docId); + daf.elements().emplace_back(0); + daf.elements().back().setNumOccs(1); + daf.word_positions().emplace_back(0); //LOG(info, "add(%" PRIu64 ", %u)", wordNum, docId); _writer.add(daf); return *this; diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp index 49e9d613861..aca83d67a8a 100644 --- a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp @@ -41,27 +41,27 @@ Test::assertFeatures(const DocIdAndFeatures &exp, const DocIdAndFeatures &act) { // docid is not encoded as part of features - if (!EXPECT_EQUAL(exp._elements.size(), - act._elements.size())) + if (!EXPECT_EQUAL(exp.elements().size(), + act.elements().size())) return false; - for (size_t i = 0; i < exp._elements.size(); ++i) { - if (!EXPECT_EQUAL(exp._elements[i]._elementId, - act._elements[i]._elementId)) + for (size_t i = 0; i < exp.elements().size(); ++i) { + if (!EXPECT_EQUAL(exp.elements()[i].getElementId(), + act.elements()[i].getElementId())) return false; - if (!EXPECT_EQUAL(exp._elements[i]._numOccs, - act._elements[i]._numOccs)) + if (!EXPECT_EQUAL(exp.elements()[i].getNumOccs(), + act.elements()[i].getNumOccs())) return false; - if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) + if (!EXPECT_EQUAL(exp.elements()[i].getWeight(), act.elements()[i].getWeight())) return false; - if (!EXPECT_EQUAL(exp._elements[i]._elementLen, - act._elements[i]._elementLen)) + if (!EXPECT_EQUAL(exp.elements()[i].getElementLen(), + act.elements()[i].getElementLen())) return false; } - if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) + if (!EXPECT_EQUAL(exp.word_positions().size(), act.word_positions().size())) return false; - for (size_t i = 0; i < exp._wordPositions.size(); ++i) { - if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, - act._wordPositions[i]._wordPos)) return false; + for (size_t i = 0; i < exp.word_positions().size(); ++i) { + if (!EXPECT_EQUAL(exp.word_positions()[i].getWordPos(), + act.word_positions()[i].getWordPos())) return false; } return true; } @@ -73,13 +73,13 @@ getFeatures(uint32_t numOccs, uint32_t elemLen) { DocIdAndFeatures f; - f._docId = 0; - f._elements.push_back(WordDocElementFeatures(0)); - f._elements.back().setNumOccs(numOccs); - f._elements.back().setWeight(weight); - f._elements.back().setElementLen(elemLen); + f.set_doc_id(0); + f.elements().push_back(WordDocElementFeatures(0)); + f.elements().back().setNumOccs(numOccs); + f.elements().back().setWeight(weight); + f.elements().back().setElementLen(elemLen); for (uint32_t i = 0; i < numOccs; ++i) { - f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + f.word_positions().push_back(WordDocElementWordPosFeatures(i)); } return f; } diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index e2401c1ad7c..2b9b77d32a3 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -99,10 +99,10 @@ public: if (!_firstDoc) { _ss << ","; } - _ss << "d=" << features._docId << "["; + _ss << "d=" << features.doc_id() << "["; bool first_elem = true; size_t word_pos_offset = 0; - for (const auto& elem : features._elements) { + for (const auto& elem : features.elements()) { if (!first_elem) { _ss << ","; } @@ -112,7 +112,7 @@ public: if (!first_pos) { _ss << ","; } - _ss << features._wordPositions[i + word_pos_offset].getWordPos(); + _ss << features.word_positions()[i + word_pos_offset].getWordPos(); first_pos = false; } word_pos_offset += elem.getNumOccs(); @@ -601,12 +601,10 @@ addElement(DocIdAndFeatures &f, uint32_t numOccs, int32_t weight = 1) { - f._elements.push_back(WordDocElementFeatures(f._elements.size())); - f._elements.back().setElementLen(elemLen); - f._elements.back().setWeight(weight); - f._elements.back().setNumOccs(numOccs); + f.elements().emplace_back(f.elements().size(), weight, elemLen); + f.elements().back().setNumOccs(numOccs); for (uint32_t i = 0; i < numOccs; ++i) { - f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + f.word_positions().emplace_back(i); } } @@ -679,11 +677,11 @@ TEST_F(FieldIndexCollectionTest, require_that_basic_dumping_to_index_builder_is_ b.startField(4); b.startWord("a"); DocIdAndFeatures features; - features._docId = 2; - features._elements.emplace_back(0, 10, 20); - features._elements.back().setNumOccs(2); - features._wordPositions.emplace_back(1); - features._wordPositions.emplace_back(3); + features.set_doc_id(2); + features.elements().emplace_back(0, 10, 20); + features.elements().back().setNumOccs(2); + features.word_positions().emplace_back(1); + features.word_positions().emplace_back(3); b.add_document(features); b.endWord(); b.endField(); diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp index d4f663f32cc..3f7b3e05287 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp @@ -12,8 +12,6 @@ LOG_SETUP(".posocccompression"); using search::index::DocIdAndFeatures; -using search::index::WordDocElementFeatures; -using search::index::WordDocElementWordPosFeatures; using search::index::PostingListParams; using search::index::SchemaUtil; using search::index::Schema; @@ -344,7 +342,7 @@ readFeatures(search::index::DocIdAndFeatures &features) const uint64_t *valE = _valE; features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead); - features.setRaw(true); + features.set_has_raw_data(true); const uint64_t *rawFeatures = (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2); uint64_t rawFeaturesStartBitPos = @@ -373,7 +371,7 @@ readFeatures(search::index::DocIdAndFeatures &features) } if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -394,7 +392,7 @@ readFeatures(search::index::DocIdAndFeatures &features) do { if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -410,7 +408,7 @@ readFeatures(search::index::DocIdAndFeatures &features) for (uint32_t pos = 1; pos < numPositions; ++pos) { if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -429,9 +427,9 @@ readFeatures(search::index::DocIdAndFeatures &features) _fileReadBias + (reinterpret_cast<unsigned long>(oCompr) << 3) - oPreRead; - features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos; + features.set_bit_length(rawFeaturesEndBitPos - rawFeaturesStartBitPos); while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } if (__builtin_expect(oCompr >= valE, false)) { @@ -451,7 +449,7 @@ readFeatures(search::index::DocIdAndFeatures &features) const uint64_t *valE = _valE; features.clearFeatures(); - features.setRaw(false); + features.set_has_raw_data(false); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; uint32_t numElements = 1; @@ -470,14 +468,13 @@ readFeatures(search::index::DocIdAndFeatures &features) EC); elementId += static_cast<uint32_t>(val64); } - features._elements. - push_back(WordDocElementFeatures(elementId)); + features.elements().emplace_back(elementId); if (fieldParams._hasElementWeights) { UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_ELEMENTWEIGHT, EC); int32_t elementWeight = this->convertToSigned(val64); - features._elements.back().setWeight(elementWeight); + features.elements().back().setWeight(elementWeight); } if (__builtin_expect(oCompr >= valE, false)) { UC64_DECODECONTEXT_STORE(o, _); @@ -489,7 +486,7 @@ readFeatures(search::index::DocIdAndFeatures &features) K_VALUE_POSOCC_ELEMENTLEN, EC); uint32_t elementLen = static_cast<uint32_t>(val64) + 1; - features._elements.back().setElementLen(elementLen); + features.elements().back().setElementLen(elementLen); UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_NUMPOSITIONS, EC); @@ -507,9 +504,8 @@ readFeatures(search::index::DocIdAndFeatures &features) K_VALUE_POSOCC_FIRST_WORDPOS, EC); wordPos = static_cast<uint32_t>(val64); - features._elements.back().incNumOccs(); - features._wordPositions.push_back( - WordDocElementWordPosFeatures(wordPos)); + features.elements().back().incNumOccs(); + features.word_positions().emplace_back(wordPos); } while (0); for (uint32_t pos = 1; pos < numPositions; ++pos) { if (__builtin_expect(oCompr >= valE, false)) { @@ -522,9 +518,8 @@ readFeatures(search::index::DocIdAndFeatures &features) K_VALUE_POSOCC_DELTA_WORDPOS, EC); wordPos += 1 + static_cast<uint32_t>(val64); - features._elements.back().incNumOccs(); - features._wordPositions.push_back( - WordDocElementWordPosFeatures(wordPos)); + features.elements().back().incNumOccs(); + features.word_positions().emplace_back(wordPos); } } UC64_DECODECONTEXT_STORE(o, _); @@ -732,23 +727,19 @@ void EG2PosOccEncodeContext<bigEndian>:: writeFeatures(const search::index::DocIdAndFeatures &features) { - if (features.getRaw()) { - writeBits(&features._blob[0], - features._bitOffset, features._bitLength); + if (features.has_raw_data()) { + writeBits(features.blob().data(), + features.bit_offset(), features.bit_length()); return; } - typedef WordDocElementFeatures Elements; - typedef WordDocElementWordPosFeatures Positions; - std::vector<Elements>::const_iterator element = features._elements.begin(); - - std::vector<Positions>::const_iterator position = - features._wordPositions.begin(); + auto element = features.elements().begin(); + auto position = features.word_positions().begin(); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; - uint32_t numElements = features._elements.size(); + uint32_t numElements = features.elements().size(); if (fieldParams._hasElements) { assert(numElements > 0u); encodeExpGolomb(numElements - 1, @@ -855,7 +846,7 @@ readFeatures(search::index::DocIdAndFeatures &features) const uint64_t *valE = _valE; features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead); - features.setRaw(true); + features.set_has_raw_data(true); const uint64_t *rawFeatures = (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2); uint64_t rawFeaturesStartBitPos = @@ -885,7 +876,7 @@ readFeatures(search::index::DocIdAndFeatures &features) } if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -910,7 +901,7 @@ readFeatures(search::index::DocIdAndFeatures &features) for (uint32_t pos = 0; pos < numPositions; ++pos) { if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -929,9 +920,9 @@ readFeatures(search::index::DocIdAndFeatures &features) _fileReadBias + (reinterpret_cast<unsigned long>(oCompr) << 3) - oPreRead; - features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos; + features.set_bit_length(rawFeaturesEndBitPos - rawFeaturesStartBitPos); while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } if (__builtin_expect(oCompr >= valE, false)) { @@ -951,7 +942,7 @@ readFeatures(search::index::DocIdAndFeatures &features) const uint64_t *valE = _valE; features.clearFeatures(); - features.setRaw(false); + features.set_has_raw_data(false); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; @@ -972,14 +963,13 @@ readFeatures(search::index::DocIdAndFeatures &features) EC); elementId += static_cast<uint32_t>(val64); } - features._elements. - push_back(WordDocElementFeatures(elementId)); + features.elements().emplace_back(elementId); if (fieldParams._hasElementWeights) { UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_ELEMENTWEIGHT, EC); int32_t elementWeight = this->convertToSigned(val64); - features._elements.back().setWeight(elementWeight); + features.elements().back().setWeight(elementWeight); } if (__builtin_expect(oCompr >= valE, false)) { UC64_DECODECONTEXT_STORE(o, _); @@ -991,13 +981,13 @@ readFeatures(search::index::DocIdAndFeatures &features) elementLenK, EC); uint32_t elementLen = static_cast<uint32_t>(val64) + 1; - features._elements.back().setElementLen(elementLen); + features.elements().back().setElementLen(elementLen); UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_NUMPOSITIONS, EC); uint32_t numPositions = static_cast<uint32_t>(val64) + 1; - features._bitLength = numPositions * 64; + features.set_bit_length(numPositions * 64); uint32_t wordPosK = EGPosOccEncodeContext<bigEndian>:: calcWordPosK(numPositions, elementLen); @@ -1014,9 +1004,8 @@ readFeatures(search::index::DocIdAndFeatures &features) wordPosK, EC); wordPos += 1 + static_cast<uint32_t>(val64); - features._elements.back().incNumOccs(); - features._wordPositions.push_back( - WordDocElementWordPosFeatures(wordPos)); + features.elements().back().incNumOccs(); + features.word_positions().emplace_back(wordPos); } } UC64_DECODECONTEXT_STORE(o, _); @@ -1227,23 +1216,19 @@ void EGPosOccEncodeContext<bigEndian>:: writeFeatures(const search::index::DocIdAndFeatures &features) { - if (features.getRaw()) { - writeBits(&features._blob[0], - features._bitOffset, features._bitLength); + if (features.has_raw_data()) { + writeBits(features.blob().data(), + features.bit_offset(), features.bit_length()); return; } - typedef WordDocElementFeatures Elements; - typedef WordDocElementWordPosFeatures Positions; - - std::vector<Elements>::const_iterator element = features._elements.begin(); - std::vector<Positions>::const_iterator position = - features._wordPositions.begin(); + auto element = features.elements().begin(); + auto position = features.word_positions().begin(); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; uint32_t elementLenK = calcElementLenK(fieldParams._avgElemLen); - uint32_t numElements = features._elements.size(); + uint32_t numElements = features.elements().size(); if (fieldParams._hasElements) { assert(numElements > 0u); encodeExpGolomb(numElements - 1, diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h index a5d46045ec5..d500dacd7d4 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h @@ -48,9 +48,9 @@ public: assert(elementLen == _elements.back().getElementLen()); } assert(_elements.back().getNumOccs() == 0 || - wordPos > _wordPositions.back().getWordPos()); + wordPos > _word_positions.back().getWordPos()); _elements.back().incNumOccs(); - _wordPositions.emplace_back(wordPos); + _word_positions.emplace_back(wordPos); } }; diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp index 96b106a15da..a41f0412294 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp @@ -63,7 +63,7 @@ void FieldReader::readDocIdAndFeatures() { _oldposoccfile->readDocIdAndFeatures(_docIdAndFeatures); - _docIdAndFeatures._docId = _docIdMapper.mapDocId(_docIdAndFeatures._docId); + _docIdAndFeatures.set_doc_id(_docIdMapper.mapDocId(_docIdAndFeatures.doc_id())); } @@ -75,13 +75,13 @@ FieldReader::read() readCounts(); if (_wordNum == noWordNumHigh()) { assert(_residue == 0); - _docIdAndFeatures._docId = NO_DOC; + _docIdAndFeatures.set_doc_id(NO_DOC); return; } } --_residue; readDocIdAndFeatures(); - if (_docIdAndFeatures._docId != NO_DOC) { + if (_docIdAndFeatures.doc_id() != NO_DOC) { return; } } @@ -267,26 +267,26 @@ FieldReaderStripInfo::read() if (_wordNum == noWordNumHigh()) { return; } - assert(!features.getRaw()); - uint32_t numElements = features._elements.size(); + assert(!features.has_raw_data()); + uint32_t numElements = features.elements().size(); assert(numElements > 0); std::vector<Element>::iterator element = - features._elements.begin(); + features.elements().begin(); if (_hasElements) { if (!_hasElementWeights) { for (uint32_t elementDone = 0; elementDone < numElements; ++elementDone, ++element) { element->setWeight(1); } - assert(element == features._elements.end()); + assert(element == features.elements().end()); } } else { if (element->getElementId() != 0) { continue; // Drop this entry, try to read new entry } element->setWeight(1); - features._wordPositions.resize(element->getNumOccs()); + features.word_positions().resize(element->getNumOccs()); if (numElements > 1) { - features._elements.resize(1); + features.elements().resize(1); } } break; diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.h b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h index a73ffa149a9..50748d037c0 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h @@ -85,7 +85,7 @@ public: bool operator<(const FieldReader &rhs) const { return _wordNum < rhs._wordNum || (_wordNum == rhs._wordNum && - _docIdAndFeatures._docId < rhs._docIdAndFeatures._docId); + _docIdAndFeatures.doc_id() < rhs._docIdAndFeatures.doc_id()); } virtual void setup(const WordNumMapping &wordNumMapping, const DocIdMapping &docIdMapping); diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h index 1e9afb717e8..e5aa9788071 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -58,11 +58,11 @@ public: void newWord(vespalib::stringref word); void add(const DocIdAndFeatures &features) { - assert(features._docId < _docIdLimit); - assert(features._docId > _prevDocId); + assert(features.doc_id() < _docIdLimit); + assert(features.doc_id() > _prevDocId); _posoccfile->writeDocIdAndFeatures(features); - _bvc.add(features._docId); - _prevDocId = features._docId; + _bvc.add(features.doc_id()); + _prevDocId = features.doc_id(); } uint64_t getSparseWordNum() const { return _wordNum; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp index 30cef1dc258..c0e1115521c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp @@ -66,7 +66,7 @@ Zc4PostingReader<bigEndian>::read_common_word_doc_id_and_features(DocIdAndFeatur assert(_zcDocIds._valI < _zcDocIds._valE); uint32_t docIdPos = _zcDocIds.pos(); uint32_t docId = _prevDocId + 1 + _zcDocIds.decode(); - features._docId = docId; + features.set_doc_id(docId); _prevDocId = docId; assert(docId <= _lastDocId); if (docId > _l1SkipDocId) { @@ -179,7 +179,7 @@ Zc4PostingReader<bigEndian>::read_doc_id_and_features(DocIdAndFeatures &features UC64_DECODEEXPGOLOMB_SMALL_NS(o, _docIdK, EC); uint32_t docId = _prevDocId + 1 + val64; - features._docId = docId; + features.set_doc_id(docId); _prevDocId = docId; UC64_DECODECONTEXT_STORE(o, d._); if (__builtin_expect(oCompr >= d._valE, false)) { diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp index 477db7095ed..78d18cb5550 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp @@ -153,11 +153,11 @@ Zc4PostingWriter<bigEndian>::write_docid_and_features(const DocIdAndFeatures &fe uint64_t writeOffset = _encode_features->getWriteOffset(); uint64_t featureSize = writeOffset - _featureOffset; assert(static_cast<uint32_t>(featureSize) == featureSize); - _docIds.push_back(std::make_pair(features._docId, + _docIds.push_back(std::make_pair(features.doc_id(), static_cast<uint32_t>(featureSize))); _featureOffset = writeOffset; } else { - _docIds.push_back(std::make_pair(features._docId, uint32_t(0))); + _docIds.push_back(std::make_pair(features.doc_id(), uint32_t(0))); } } diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp index 35a8c1338dd..07b4da8a85f 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp @@ -7,14 +7,15 @@ LOG_SETUP(".index.docidandfeatures"); namespace search::index { DocIdAndFeatures::DocIdAndFeatures() - : _docId(0), + : _doc_id(0), _elements(), - _wordPositions(), + _word_positions(), _blob(), - _bitOffset(0u), - _bitLength(0u), - _raw(false) -{ } + _bit_offset(0u), + _bit_length(0u), + _has_raw_data(false) +{ +} DocIdAndFeatures::DocIdAndFeatures(const DocIdAndFeatures &) = default; DocIdAndFeatures & DocIdAndFeatures::operator = (const DocIdAndFeatures &) = default; diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h index 816358a0380..6a619e8bbbc 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.h +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h @@ -19,13 +19,13 @@ namespace search::index { * Present as vector element in DocIdAndFeatures. */ class WordDocElementFeatures { -public: +private: uint32_t _elementId; // Array index uint32_t _numOccs; int32_t _weight; uint32_t _elementLen; - // TODO: add support for user features +public: WordDocElementFeatures() : _elementId(0u), _numOccs(0u), @@ -67,9 +67,10 @@ public: * Present as vector element in DocIdAndFeatures. */ class WordDocElementWordPosFeatures { -public: +private: uint32_t _wordPos; +public: WordDocElementWordPosFeatures() : _wordPos(0u) {} @@ -90,15 +91,20 @@ public: */ class DocIdAndFeatures { public: - uint32_t _docId; // Current Docid + using RawData = std::vector<uint64_t>; + +protected: + uint32_t _doc_id; // Current document id std::vector<WordDocElementFeatures> _elements; - std::vector<WordDocElementWordPosFeatures> _wordPositions; - // raw data (file format specific, packed) - std::vector<uint64_t> _blob; // Feature data for (word, docid) pair - uint32_t _bitOffset; // Offset of feature start ([0..63]) - uint32_t _bitLength; // Length of features - bool _raw; // + std::vector<WordDocElementWordPosFeatures> _word_positions; + // Raw data (file format specific, packed) + RawData _blob; // Feature data for (word, docid) pair + uint32_t _bit_offset; // Offset of feature start ([0..63]) + uint32_t _bit_length; // Length of features + bool _has_raw_data; + +public: DocIdAndFeatures(); DocIdAndFeatures(const DocIdAndFeatures &); DocIdAndFeatures & operator = (const DocIdAndFeatures &); @@ -108,33 +114,47 @@ public: void clearFeatures() { _elements.clear(); - _wordPositions.clear(); - _bitOffset = 0u; - _bitLength = 0u; + _word_positions.clear(); + _bit_offset = 0u; + _bit_length = 0u; _blob.clear(); } void clearFeatures(uint32_t bitOffset) { _elements.clear(); - _wordPositions.clear(); - _bitOffset = bitOffset; - _bitLength = 0u; + _word_positions.clear(); + _bit_offset = bitOffset; + _bit_length = 0u; _blob.clear(); } void clear(uint32_t docId) { - _docId = docId; + _doc_id = docId; clearFeatures(); } void clear(uint32_t docId, uint32_t bitOffset) { - _docId = docId; + _doc_id = docId; clearFeatures(bitOffset); } - void setRaw(bool raw) { _raw = raw; } - bool getRaw() const { return _raw; } + uint32_t doc_id() const { return _doc_id; } + void set_doc_id(uint32_t val) { _doc_id = val; } + + const std::vector<WordDocElementFeatures>& elements() const { return _elements; } + std::vector<WordDocElementFeatures>& elements() { return _elements; } + + const std::vector<WordDocElementWordPosFeatures>& word_positions() const { return _word_positions; } + std::vector<WordDocElementWordPosFeatures>& word_positions() { return _word_positions; } + + const RawData& blob() const { return _blob; } + RawData& blob() { return _blob; } + uint32_t bit_offset() const { return _bit_offset; } + uint32_t bit_length() const { return _bit_length; } + void set_bit_length(uint32_t val) { _bit_length = val; } + bool has_raw_data() const { return _has_raw_data; } + void set_has_raw_data(bool val) { _has_raw_data = val; } }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp index 974fcc01c36..1d55ed76a09 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp @@ -21,7 +21,7 @@ FeatureStore::writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &featur oldOffset = 0; assert(_f.getWriteOffset() == oldOffset); } - assert(!features.getRaw()); + assert(!features.has_raw_data()); _f.writeFeatures(features); return oldOffset; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 447187e5af7..e79cab28dec 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -171,7 +171,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder) EntryRef featureRef(pitr.getData()); _featureStore.setupForReadFeatures(featureRef, decoder); decoder.readFeatures(features); - features._docId = docId; + features.set_doc_id(docId); indexBuilder.add_document(features); } } else { @@ -183,7 +183,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder) EntryRef featureRef(kd->getData()); _featureStore.setupForReadFeatures(featureRef, decoder); decoder.readFeatures(features); - features._docId = docId; + features.set_doc_id(docId); indexBuilder.add_document(features); } } diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp index 28c5b1fa5df..1e25878a33e 100644 --- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp @@ -43,9 +43,9 @@ struct Builder addDoc(uint32_t docId) { _features.clear(docId); - _features._elements.emplace_back(0, 1, 1); - _features._elements.back().setNumOccs(1); - _features._wordPositions.emplace_back(0); + _features.elements().emplace_back(0, 1, 1); + _features.elements().back().setNumOccs(1); + _features.word_positions().emplace_back(0); _ib.add_document(_features); } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp index 9cbbd136148..d59417a1e78 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp @@ -206,7 +206,7 @@ FakeMemTreeOccMgr::add(uint32_t wordIdx, index::DocIdAndFeatures &features) _featureSizes[wordIdx] += RefType::align((r.second + 7) / 8) * 8; - _unflushed.push_back(PendingOp(wordIdx, features._docId, r.first)); + _unflushed.push_back(PendingOp(wordIdx, features.doc_id(), r.first)); if (_unflushed.size() >= 10000) flush(); diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp index 1fa518af28f..8f6c16658c9 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp @@ -584,7 +584,7 @@ FakeWord::validate(FieldReader &fieldReader, for (residue = numDocs; residue > 0; --residue) { assert(fieldReader._wordNum == wordNum); DocIdAndFeatures &features(fieldReader._docIdAndFeatures); - docId = features._docId; + docId = features.doc_id(); assert(d != de); assert(d->_docId == docId); if (matchData.valid()) { @@ -598,15 +598,15 @@ FakeWord::validate(FieldReader &fieldReader, typedef WordDocElementWordPosFeatures Positions; std::vector<Elements>::const_iterator element = - features._elements.begin(); + features.elements().begin(); std::vector<Positions>::const_iterator position = - features._wordPositions.begin(); + features.word_positions().begin(); TermFieldMatchData *tfmd = matchData[0]; assert(tfmd != 0); - tfmd->reset(features._docId); + tfmd->reset(features.doc_id()); - uint32_t elementResidue = features._elements.size(); + uint32_t elementResidue = features.elements().size(); while (elementResidue != 0) { uint32_t positionResidue = element->getNumOccs(); while (positionResidue != 0) { diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index f8fe31773c2..f6c6e5a64f3 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -267,16 +267,16 @@ FakeZcFilterOcc::validate_read(const FakeWord &fw, bool encode_features, bool dy check_features.clear(doc._docId); } reader.read_doc_id_and_features(features); - assert(features._docId == doc._docId); - assert(features._elements.size() == check_features._elements.size()); - assert(features._wordPositions.size() == check_features._wordPositions.size()); + assert(features.doc_id() == doc._docId); + assert(features.elements().size() == check_features.elements().size()); + assert(features.word_positions().size() == check_features.word_positions().size()); ++hits; } if (encode_features) { assert(word_pos_iterator == word_pos_iterator_end); } reader.read_doc_id_and_features(features); - assert(static_cast<int32_t>(features._docId) == -1); + assert(static_cast<int32_t>(features.doc_id()) == -1); } FakeZcFilterOcc::~FakeZcFilterOcc() diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index 08473f9fc6c..a341e36045e 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -53,9 +53,9 @@ public: _ss << "a=" << docId; if (_verbose) { _ss << "("; - auto wpi = features._wordPositions.begin(); + auto wpi = features.word_positions().begin(); bool firstElement = true; - for (auto &el : features._elements) { + for (auto &el : features.elements()) { if (!firstElement) { _ss << ","; } |