summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-05-31 09:29:44 +0200
committerGitHub <noreply@github.com>2019-05-31 09:29:44 +0200
commit530df929b49dc0285ee31fa74c1bcfc908f787f1 (patch)
tree9d16a1d8d8be53d2d77be02968dec07fc37fcc42 /searchlib
parentcb165b874d9d7bdc31c0081d7ec26bb6cd963bac (diff)
parent42c74a36e5ef0cc97b8c20f753fb34503bf32d40 (diff)
Merge pull request #9612 from vespa-engine/toregge/calulate-field-length-in-field-inverter
Calculate field length in field inverter.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.h14
-rw-r--r--searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h9
4 files changed, 69 insertions, 8 deletions
diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
index f08e61b0da2..a818bb75bf2 100644
--- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
@@ -98,6 +98,16 @@ makeDoc16(DocBuilder &b)
return b.endDocument();
}
+Document::UP
+makeDoc17(DocBuilder &b)
+{
+ b.startDocument("doc::17");
+ b.startIndexField("f1").addStr("foo0").addStr("bar0").endField();
+ b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField();
+ b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField();
+ return b.endDocument();
+}
+
}
struct Fixture
@@ -323,6 +333,23 @@ TEST_F("require that multiple words at same position works", Fixture)
f._inserter.toStr());
}
+TEST_F("require that cheap features are calculated", Fixture)
+{
+ f.invertDocument(17, *makeDoc17(f._b));
+ f._inserter.setVerbose();
+ f._inserter.set_show_cheap_features();
+ f.pushDocuments();
+ EXPECT_EQUAL("f=1,"
+ "w=bar0,a=17(fl=2,occs=1,e=0,w=1,l=2[1]),"
+ "w=foo0,a=17(fl=2,occs=1,e=0,w=1,l=2[0]),"
+ "f=2,"
+ "w=bar,a=17(fl=3,occs=2,e=0,w=1,l=2[1],e=1,w=1,l=1[0]),"
+ "w=foo,a=17(fl=3,occs=1,e=0,w=1,l=2[0]),"
+ "f=3,"
+ "w=bar2,a=17(fl=3,occs=2,e=0,w=3,l=2[1],e=1,w=4,l=1[0]),"
+ "w=foo2,a=17(fl=3,occs=1,e=0,w=3,l=2[0])",
+ f._inserter.toStr());
+}
} // namespace memoryindex
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
index d19f05a98ee..bfa0143d395 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
@@ -279,6 +279,29 @@ FieldInverter::remove(const vespalib::stringref word, uint32_t docId)
}
void
+FieldInverter::endDoc()
+{
+ uint32_t field_length = 0;
+ if (_elem > 0) {
+ auto itr = _elems.end() - _elem;
+ while (itr != _elems.end()) {
+ field_length += itr->_len;
+ ++itr;
+ }
+ itr = _elems.end() - _elem;
+ while (itr != _elems.end()) {
+ itr->set_field_length(field_length);
+ ++itr;
+ }
+ }
+ uint32_t newPosSize = static_cast<uint32_t>(_positions.size());
+ _pendingDocs.insert({ _docId,
+ { _oldPosSize, newPosSize - _oldPosSize } });
+ _docId = 0;
+ _oldPosSize = newPosSize;
+}
+
+void
FieldInverter::processNormalDocTextField(const StringFieldValue &field)
{
startElement(1);
@@ -500,6 +523,7 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter)
(void) numWordIds;
if (lastWordNum != i._wordNum || lastDocId != i._docId) {
if (!emptyFeatures) {
+ _features.set_num_occs(_features.word_positions().size());
inserter.add(lastDocId, _features);
emptyFeatures = true;
}
@@ -520,6 +544,8 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter)
_features.clear(lastDocId);
lastElemId = NO_ELEMENT_ID;
lastWordPos = NO_WORD_POS;
+ const ElemInfo &elem = _elems[i._elemRef];
+ _features.set_field_length(elem.get_field_length());
} else {
continue; // ignore dup remove
}
@@ -539,6 +565,7 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter)
}
if (!emptyFeatures) {
+ _features.set_num_occs(_features.word_positions().size());
inserter.add(lastDocId, _features);
}
inserter.flush();
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
index ba6a0e96698..e547dbe98c6 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
@@ -99,14 +99,18 @@ private:
public:
int32_t _weight;
uint32_t _len;
+ uint32_t _field_length;
ElemInfo(int32_t weight)
: _weight(weight),
- _len(0u)
+ _len(0u),
+ _field_length(0u)
{
}
void setLen(uint32_t len) { _len = len; }
+ uint32_t get_field_length() const { return _field_length; }
+ void set_field_length(uint32_t field_length) { _field_length = field_length; }
};
using ElemInfoVec = std::vector<ElemInfo>;
@@ -317,13 +321,7 @@ public:
_wpos = 0;
}
- void endDoc() {
- uint32_t newPosSize = static_cast<uint32_t>(_positions.size());
- _pendingDocs.insert({ _docId,
- { _oldPosSize, newPosSize - _oldPosSize } });
- _docId = 0;
- _oldPosSize = newPosSize;
- }
+ void endDoc();
void addWord(const vespalib::stringref word) {
uint32_t wordRef = saveWord(word);
diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h
index a341e36045e..f984bd8fcbd 100644
--- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h
+++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h
@@ -11,6 +11,7 @@ class OrderedFieldIndexInserter : public IOrderedFieldIndexInserter {
std::stringstream _ss;
bool _first;
bool _verbose;
+ bool _show_cheap_features;
uint32_t _fieldId;
void
@@ -27,6 +28,7 @@ public:
: _ss(),
_first(true),
_verbose(false),
+ _show_cheap_features(false),
_fieldId(0)
{
}
@@ -55,6 +57,11 @@ public:
_ss << "(";
auto wpi = features.word_positions().begin();
bool firstElement = true;
+ if (_show_cheap_features) {
+ _ss << "fl=" << features.field_length() <<
+ ",occs=" << features.num_occs();
+ firstElement = false;
+ }
for (auto &el : features.elements()) {
if (!firstElement) {
_ss << ",";
@@ -70,6 +77,7 @@ public:
}
firstWordPos = false;
_ss << wpi->getWordPos();
+ ++wpi;
}
_ss << "]";
}
@@ -105,6 +113,7 @@ public:
}
void setVerbose() { _verbose = true; }
+ void set_show_cheap_features() { _show_cheap_features = true; }
};
}