summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-04-12 13:59:19 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-04-12 14:07:35 +0200
commit278ed8c522a77519b8fd942421b7f0958d306725 (patch)
tree61b564d9bd11fb2f24521e753e89c1715f21f3f8 /searchlib
parent28a9be2321136a976bdc3bc5b45cef084f81d815 (diff)
Factor out reading of zc4 posting header in unit tests.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp105
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h35
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_params.h29
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp165
5 files changed, 208 insertions, 127 deletions
diff --git a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
index b21b799e693..104994ad038 100644
--- a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
@@ -18,6 +18,7 @@ vespa_add_library(searchlib_diskindex OBJECT
pagedict4file.cpp
pagedict4randread.cpp
wordnummapper.cpp
+ zc4_posting_header.cpp
zc4_posting_writer.cpp
zc4_posting_writer_base.cpp
zcbuf.cpp
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp
new file mode 100644
index 00000000000..3adb32f8681
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp
@@ -0,0 +1,105 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "zc4_posting_header.h"
+#include "zc4_posting_params.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+
+namespace search::diskindex
+{
+
+Zc4PostingHeader::Zc4PostingHeader()
+ : _has_more(false),
+ _doc_id_k(K_VALUE_ZCPOSTING_LASTDOCID),
+ _num_docs(0u),
+ _doc_ids_size(0u),
+ _l1_skip_size(0u),
+ _l2_skip_size(0u),
+ _l3_skip_size(0u),
+ _l4_skip_size(0u),
+ _features_size(0u),
+ _last_doc_id(0)
+{
+}
+
+template <bool bigEndian>
+void
+Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params)
+{
+ using EC = bitcompression::FeatureEncodeContext<bigEndian>;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, decode_context._);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+ _num_docs = static_cast<uint32_t>(val64) + 1;
+ bool has_more = false;
+ if (__builtin_expect(_num_docs >= params._min_chunk_docs, false)) {
+ if (bigEndian) {
+ has_more = static_cast<int64_t>(oVal) < 0;
+ oVal <<= 1;
+ length = 1;
+ } else {
+ has_more = (oVal & 1) != 0;
+ oVal >>= 1;
+ length = 1;
+ }
+ UC64_READBITS_NS(o, EC);
+ }
+ if (params._dynamic_k) {
+ _doc_id_k = EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit);
+ } else {
+ _doc_id_k = K_VALUE_ZCPOSTING_LASTDOCID;
+ }
+ if (_num_docs < params._min_skip_docs && !_has_more) {
+ _doc_ids_size = 0;
+ _l1_skip_size = 0;
+ _l2_skip_size = 0;
+ _l3_skip_size = 0;
+ _l4_skip_size = 0;
+ _features_size = 0;
+ _last_doc_id = 0;
+ } else {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
+ _doc_ids_size = val64 + 1;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
+ _l1_skip_size = val64;
+ if (_l1_skip_size != 0) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
+ _l2_skip_size = val64;
+ }
+ if (_l2_skip_size != 0) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
+ _l3_skip_size = val64;
+ }
+ if (_l3_skip_size != 0) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
+ _l4_skip_size = val64;
+ }
+ if (params._encode_features) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
+ _features_size = val64;
+ } else {
+ _features_size = 0;
+ }
+ UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k, EC);
+ _last_doc_id = params._doc_id_limit - 1 - val64;
+ uint64_t bytePad = oPreRead & 7;
+ if (bytePad > 0) {
+ length = bytePad;
+ UC64_READBITS_NS(o, EC);
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, decode_context._);
+ _has_more = has_more;
+}
+
+template
+void
+Zc4PostingHeader::read<false>(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params);
+
+template
+void
+Zc4PostingHeader::read<true>(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params);
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h
new file mode 100644
index 00000000000..7382f59d176
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h
@@ -0,0 +1,35 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace search::bitcompression { class DecodeContext64Base; }
+
+namespace search::diskindex {
+
+struct Zc4PostingParams;
+
+/*
+ * Struct containing the decoded header for a word.
+ */
+struct Zc4PostingHeader {
+ bool _has_more;
+ uint32_t _doc_id_k;
+ uint32_t _num_docs;
+ uint32_t _doc_ids_size;
+ uint32_t _l1_skip_size;
+ uint32_t _l2_skip_size;
+ uint32_t _l3_skip_size;
+ uint32_t _l4_skip_size;
+ uint64_t _features_size;
+ uint32_t _last_doc_id;
+
+ Zc4PostingHeader();
+
+ template <bool bigEndian>
+ void
+ read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_params.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_params.h
new file mode 100644
index 00000000000..ea4cc6f58a6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_params.h
@@ -0,0 +1,29 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace search::diskindex {
+
+/*
+ * Struct containing parameters for posting list.
+ */
+struct Zc4PostingParams {
+ uint32_t _min_skip_docs;
+ uint32_t _min_chunk_docs;
+ uint32_t _doc_id_limit;
+ bool _dynamic_k;
+ bool _encode_features;
+
+ Zc4PostingParams(uint32_t min_skip_docs, uint32_t min_chunk_docs, uint32_t doc_id_limit, bool dynamic_k, bool encode_features)
+ : _min_skip_docs(min_skip_docs),
+ _min_chunk_docs(min_chunk_docs),
+ _doc_id_limit(doc_id_limit),
+ _dynamic_k(dynamic_k),
+ _encode_features(encode_features)
+ {
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
index 33819d4f7cb..3d4567ed2ab 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
@@ -4,6 +4,8 @@
#include "fpfactory.h"
#include <vespa/searchlib/diskindex/zcposocciterators.h>
#include <vespa/searchlib/diskindex/zc4_posting_writer.h>
+#include <vespa/searchlib/diskindex/zc4_posting_header.h>
+#include <vespa/searchlib/diskindex/zc4_posting_params.h>
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataArray;
@@ -13,6 +15,8 @@ using search::index::PostingListCounts;
using search::index::PostingListParams;
using search::index::DocIdAndFeatures;
using search::index::DocIdAndPosOccFeatures;
+using search::bitcompression::DecodeContext64;
+using search::bitcompression::DecodeContext64Base;
using search::bitcompression::PosOccFieldParams;
using search::bitcompression::EGPosOccEncodeContext;
using search::bitcompression::EG2PosOccEncodeContext;
@@ -200,38 +204,18 @@ void
FakeZcFilterOcc::read_header(bool doFeatures, bool dynamicK, uint32_t min_skip_docs, uint32_t min_chunk_docs)
{
// read back word header to get skip sizes
- using EC = FeatureEncodeContext<bigEndian>;
- UC64_DECODECONTEXT(o);
- uint32_t length;
- uint64_t val64;
- UC64_SETUPBITS_NS(o, _compressed.first, 0, EC);
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
- assert(static_cast<uint32_t>(val64) + 1 == _hitDocs);
- assert(_hitDocs >= min_skip_docs);
- assert(_hitDocs < min_chunk_docs);
- uint32_t docIdK = dynamicK ? EC::calcDocIdK(_hitDocs, _docIdLimit) : K_VALUE_ZCPOSTING_LASTDOCID;
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
- _docIdsSize = val64 + 1;
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
- _l1SkipSize = val64;
- if (_l1SkipSize != 0) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
- _l2SkipSize = val64;
- }
- if (_l2SkipSize != 0) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
- _l3SkipSize = val64;
- }
- if (_l3SkipSize != 0) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
- _l4SkipSize = val64;
- }
- if (doFeatures) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
- _featuresSize = val64;
- }
- UC64_DECODEEXPGOLOMB_NS(o, docIdK, EC);
- assert(_lastDocId == _docIdLimit - 1 - val64);
+ DecodeContext64<bigEndian> decode_context;
+ decode_context.setPosition({ _compressed.first, 0 });
+ Zc4PostingParams params(min_skip_docs, min_chunk_docs, _docIdLimit, dynamicK, doFeatures);
+ Zc4PostingHeader header;
+ header.read<bigEndian>(decode_context, params);
+ _docIdsSize = header._doc_ids_size;
+ _l1SkipSize = header._l1_skip_size;
+ _l2SkipSize = header._l2_skip_size;
+ _l3SkipSize = header._l3_skip_size;
+ _l4SkipSize = header._l4_skip_size;
+ _featuresSize = header._features_size;
+ assert(_lastDocId == header._last_doc_id);
}
@@ -383,54 +367,17 @@ FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end)
{
queryeval::RankedSearchIteratorBase::initRange(begin, end);
DecodeContext &d = _decodeContext;
- typedef EncodeContext EC;
- UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
- uint32_t length;
- uint64_t val64;
-
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
- uint32_t numDocs = static_cast<uint32_t>(val64) + 1;
-
- uint32_t docIdK = EC::calcDocIdK(numDocs, _docIdLimit);
-
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
- uint32_t docIdsSize = val64 + 1;
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
- uint32_t l1SkipSize = val64;
- uint32_t l2SkipSize = 0;
- if (l1SkipSize != 0) {
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
- l2SkipSize = val64;
- }
- uint32_t l3SkipSize = 0;
- if (l2SkipSize != 0) {
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
- l3SkipSize = val64;
- }
- uint32_t l4SkipSize = 0;
- if (l3SkipSize != 0) {
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
- l4SkipSize = val64;
- }
- // Feature size would be here
- UC64BE_DECODEEXPGOLOMB_NS(o, docIdK, EC);
- _lastDocId = _docIdLimit - 1 - val64;
- UC64_DECODECONTEXT_STORE(o, d._);
- uint64_t bytePad = oPreRead & 7;
- if (bytePad > 0) {
- length = bytePad;
- oVal <<= length;
- UC64BE_READBITS_NS(o, EC);
- }
- UC64_DECODECONTEXT_STORE(o, d._);
+ Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false);
+ Zc4PostingHeader header;
+ header.read<true>(d, params);
assert((d.getBitOffset() & 7) == 0);
const uint8_t *bcompr = d.getByteCompr();
_valI = bcompr;
- bcompr += docIdsSize;
- bcompr += l1SkipSize;
- bcompr += l2SkipSize;
- bcompr += l3SkipSize;
- bcompr += l4SkipSize;
+ bcompr += header._doc_ids_size;
+ bcompr += header._l1_skip_size;
+ bcompr += header._l2_skip_size;
+ bcompr += header._l3_skip_size;
+ bcompr += header._l4_skip_size,
d.setByteCompr(bcompr);
uint32_t oDocId;
ZCDECODE(_valI, oDocId = 1 +);
@@ -439,7 +386,7 @@ FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end)
oDocId);
#endif
setDocId(oDocId);
- _residue = numDocs;
+ _residue = header._num_docs;
}
@@ -641,79 +588,43 @@ initRange(uint32_t begin, uint32_t end)
{
queryeval::RankedSearchIteratorBase::initRange(begin, end);
DecodeContext &d = _decodeContext;
- typedef EncodeContext EC;
- UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
- uint32_t length;
- uint64_t val64;
-
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
- uint32_t numDocs = static_cast<uint32_t>(val64) + 1;
-
- uint32_t docIdK = EC::calcDocIdK(numDocs, _docIdLimit);
-
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
- uint32_t docIdsSize = val64 + 1;
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
- uint32_t l1SkipSize = val64;
- uint32_t l2SkipSize = 0;
- if (l1SkipSize != 0) {
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
- l2SkipSize = val64;
- }
- uint32_t l3SkipSize = 0;
- if (l2SkipSize != 0) {
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
- l3SkipSize = val64;
- }
- uint32_t l4SkipSize = 0;
- if (l3SkipSize != 0) {
- UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
- l4SkipSize = val64;
- }
- // Feature size would be here
- UC64BE_DECODEEXPGOLOMB_NS(o, docIdK, EC);
- _lastDocId = _docIdLimit - 1 - val64;
- UC64_DECODECONTEXT_STORE(o, d._);
- uint64_t bytePad = oPreRead & 7;
- if (bytePad > 0) {
- length = bytePad;
- oVal <<= length;
- UC64BE_READBITS_NS(o, EC);
- }
- UC64_DECODECONTEXT_STORE(o, d._);
+ Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false);
+ Zc4PostingHeader header;
+ header.read<true>(d, params);
+ _lastDocId = header._last_doc_id;
assert((d.getBitOffset() & 7) == 0);
const uint8_t *bcompr = d.getByteCompr();
_valIBase = _valI = bcompr;
_l1SkipDocIdPos = _l2SkipDocIdPos = bcompr;
_l3SkipDocIdPos = _l4SkipDocIdPos = bcompr;
- bcompr += docIdsSize;
- if (l1SkipSize != 0) {
+ bcompr += header._doc_ids_size;
+ if (header._l1_skip_size != 0) {
_l1SkipValIBase = _l1SkipValI = bcompr;
_l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = bcompr;
- bcompr += l1SkipSize;
+ bcompr += header._l1_skip_size;
} else {
_l1SkipValIBase = _l1SkipValI = NULL;
_l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = NULL;
}
- if (l2SkipSize != 0) {
+ if (header._l2_skip_size != 0) {
_l2SkipValIBase = _l2SkipValI = bcompr;
_l3SkipL2SkipPos = _l4SkipL2SkipPos = bcompr;
- bcompr += l2SkipSize;
+ bcompr += header._l2_skip_size;
} else {
_l2SkipValIBase = _l2SkipValI = NULL;
_l3SkipL2SkipPos = _l4SkipL2SkipPos = NULL;
}
- if (l3SkipSize != 0) {
+ if (header._l3_skip_size != 0) {
_l3SkipValIBase = _l3SkipValI = bcompr;
_l4SkipL3SkipPos = bcompr;
- bcompr += l3SkipSize;
+ bcompr += header._l3_skip_size;
} else {
_l3SkipValIBase = _l3SkipValI = NULL;
_l4SkipL3SkipPos = NULL;
}
- if (l4SkipSize != 0) {
+ if (header._l4_skip_size != 0) {
_l4SkipValI = bcompr;
- bcompr += l4SkipSize;
+ bcompr += header._l4_skip_size;
} else {
_l4SkipValI = NULL;
}