diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-05-31 09:29:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-31 09:29:44 +0200 |
commit | 530df929b49dc0285ee31fa74c1bcfc908f787f1 (patch) | |
tree | 9d16a1d8d8be53d2d77be02968dec07fc37fcc42 /searchlib | |
parent | cb165b874d9d7bdc31c0081d7ec26bb6cd963bac (diff) | |
parent | 42c74a36e5ef0cc97b8c20f753fb34503bf32d40 (diff) |
Merge pull request #9612 from vespa-engine/toregge/calulate-field-length-in-field-inverter
Calculate field length in field inverter.
Diffstat (limited to 'searchlib')
4 files changed, 69 insertions, 8 deletions
diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp index f08e61b0da2..a818bb75bf2 100644 --- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp @@ -98,6 +98,16 @@ makeDoc16(DocBuilder &b) return b.endDocument(); } +Document::UP +makeDoc17(DocBuilder &b) +{ + b.startDocument("doc::17"); + b.startIndexField("f1").addStr("foo0").addStr("bar0").endField(); + b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField(); + b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField(); + return b.endDocument(); +} + } struct Fixture @@ -323,6 +333,23 @@ TEST_F("require that multiple words at same position works", Fixture) f._inserter.toStr()); } +TEST_F("require that cheap features are calculated", Fixture) +{ + f.invertDocument(17, *makeDoc17(f._b)); + f._inserter.setVerbose(); + f._inserter.set_show_cheap_features(); + f.pushDocuments(); + EXPECT_EQUAL("f=1," + "w=bar0,a=17(fl=2,occs=1,e=0,w=1,l=2[1])," + "w=foo0,a=17(fl=2,occs=1,e=0,w=1,l=2[0])," + "f=2," + "w=bar,a=17(fl=3,occs=2,e=0,w=1,l=2[1],e=1,w=1,l=1[0])," + "w=foo,a=17(fl=3,occs=1,e=0,w=1,l=2[0])," + "f=3," + "w=bar2,a=17(fl=3,occs=2,e=0,w=3,l=2[1],e=1,w=4,l=1[0])," + "w=foo2,a=17(fl=3,occs=1,e=0,w=3,l=2[0])", + f._inserter.toStr()); +} } // namespace memoryindex } // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp index d19f05a98ee..bfa0143d395 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp @@ -279,6 +279,29 @@ FieldInverter::remove(const vespalib::stringref word, uint32_t docId) } void +FieldInverter::endDoc() +{ + uint32_t field_length = 0; + if (_elem > 0) { + auto itr = _elems.end() - _elem; + while (itr != _elems.end()) { + field_length += itr->_len; + ++itr; + } + itr = _elems.end() - _elem; + while (itr != _elems.end()) { + itr->set_field_length(field_length); + ++itr; + } + } + uint32_t newPosSize = static_cast<uint32_t>(_positions.size()); + _pendingDocs.insert({ _docId, + { _oldPosSize, newPosSize - _oldPosSize } }); + _docId = 0; + _oldPosSize = newPosSize; +} + +void FieldInverter::processNormalDocTextField(const StringFieldValue &field) { startElement(1); @@ -500,6 +523,7 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) (void) numWordIds; if (lastWordNum != i._wordNum || lastDocId != i._docId) { if (!emptyFeatures) { + _features.set_num_occs(_features.word_positions().size()); inserter.add(lastDocId, _features); emptyFeatures = true; } @@ -520,6 +544,8 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) _features.clear(lastDocId); lastElemId = NO_ELEMENT_ID; lastWordPos = NO_WORD_POS; + const ElemInfo &elem = _elems[i._elemRef]; + _features.set_field_length(elem.get_field_length()); } else { continue; // ignore dup remove } @@ -539,6 +565,7 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) } if (!emptyFeatures) { + _features.set_num_occs(_features.word_positions().size()); inserter.add(lastDocId, _features); } inserter.flush(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index ba6a0e96698..e547dbe98c6 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -99,14 +99,18 @@ private: public: int32_t _weight; uint32_t _len; + uint32_t _field_length; ElemInfo(int32_t weight) : _weight(weight), - _len(0u) + _len(0u), + _field_length(0u) { } void setLen(uint32_t len) { _len = len; } + uint32_t get_field_length() const { return _field_length; } + void set_field_length(uint32_t field_length) { _field_length = field_length; } }; using ElemInfoVec = std::vector<ElemInfo>; @@ -317,13 +321,7 @@ public: _wpos = 0; } - void endDoc() { - uint32_t newPosSize = static_cast<uint32_t>(_positions.size()); - _pendingDocs.insert({ _docId, - { _oldPosSize, newPosSize - _oldPosSize } }); - _docId = 0; - _oldPosSize = newPosSize; - } + void endDoc(); void addWord(const vespalib::stringref word) { uint32_t wordRef = saveWord(word); diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index a341e36045e..f984bd8fcbd 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -11,6 +11,7 @@ class OrderedFieldIndexInserter : public IOrderedFieldIndexInserter { std::stringstream _ss; bool _first; bool _verbose; + bool _show_cheap_features; uint32_t _fieldId; void @@ -27,6 +28,7 @@ public: : _ss(), _first(true), _verbose(false), + _show_cheap_features(false), _fieldId(0) { } @@ -55,6 +57,11 @@ public: _ss << "("; auto wpi = features.word_positions().begin(); bool firstElement = true; + if (_show_cheap_features) { + _ss << "fl=" << features.field_length() << + ",occs=" << features.num_occs(); + firstElement = false; + } for (auto &el : features.elements()) { if (!firstElement) { _ss << ","; @@ -70,6 +77,7 @@ public: } firstWordPos = false; _ss << wpi->getWordPos(); + ++wpi; } _ss << "]"; } @@ -105,6 +113,7 @@ public: } void setVerbose() { _verbose = true; } + void set_show_cheap_features() { _show_cheap_features = true; } }; } |