aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-05-10 08:49:15 +0200
committerGitHub <noreply@github.com>2019-05-10 08:49:15 +0200
commit1e98247ac92f391bf8af18627354f2374255f32b (patch)
treec579f5f431583ea8a1ddc9bedcc73cdd1aceed6c
parent04ff496cb0620d0b4b4e48cc5a4d96da521bb1b8 (diff)
parent1b01a22accb46f210ca26a551177499cd4fee159 (diff)
Merge pull request #9349 from vespa-engine/toregge/extend-field-writer-test-to-check-encoding-of-cheap-features
Extend field writer test to check encoding of cheap features.
-rw-r--r--searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp2
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.h4
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fusion.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.h1
8 files changed, 69 insertions, 43 deletions
diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
index fab2ed734cd..c562fea69c1 100644
--- a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
@@ -47,7 +47,7 @@ FieldWriterWrapper::open(const std::string &path,
const common::FileHeaderContext &fileHeaderContext)
{
vespalib::mkdir(path, false);
- return _writer.open(path, 64, 10000, false, schema, indexId, tuneFileWrite, fileHeaderContext);
+ return _writer.open(path, 64, 10000, false, false, schema, indexId, tuneFileWrite, fileHeaderContext);
}
FieldWriterWrapper &
diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
index 2d5d3d41a3c..6d4f483fecc 100644
--- a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
+++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
@@ -79,6 +79,7 @@ void enableSkipChunks()
minChunkDocs = 9000; // Unrealistic low for testing
}
+const char *bool_to_str(bool val) { return (val ? "true" : "false"); }
vespalib::string
makeWordString(uint64_t wordNum)
@@ -146,6 +147,7 @@ public:
std::unique_ptr<FieldWriter> _fieldWriter;
private:
bool _dynamicK;
+ bool _encode_cheap_features;
uint32_t _numWordIds;
uint32_t _docIdLimit;
vespalib::string _namepref;
@@ -155,9 +157,10 @@ private:
public:
WrappedFieldWriter(const vespalib::string &namepref,
- bool dynamicK,
- uint32_t numWordIds,
- uint32_t docIdLimit);
+ bool dynamicK,
+ bool encoce_cheap_fatures,
+ uint32_t numWordIds,
+ uint32_t docIdLimit);
~WrappedFieldWriter();
void open();
@@ -168,10 +171,12 @@ WrappedFieldWriter::~WrappedFieldWriter() {}
WrappedFieldWriter::WrappedFieldWriter(const vespalib::string &namepref,
bool dynamicK,
+ bool encode_cheap_features,
uint32_t numWordIds,
uint32_t docIdLimit)
: _fieldWriter(),
_dynamicK(dynamicK),
+ _encode_cheap_features(encode_cheap_features),
_numWordIds(numWordIds),
_docIdLimit(docIdLimit),
_namepref(dirprefix + namepref),
@@ -192,8 +197,9 @@ WrappedFieldWriter::open()
fileHeaderContext.disableFileName();
_fieldWriter = std::make_unique<FieldWriter>(_docIdLimit, _numWordIds);
_fieldWriter->open(_namepref,
- minSkipDocs, minChunkDocs, _dynamicK, _schema,
- _indexId,
+ minSkipDocs, minChunkDocs,
+ _dynamicK, _encode_cheap_features,
+ _schema, _indexId,
tuneFileWrite, fileHeaderContext);
}
@@ -349,7 +355,7 @@ void
writeField(FakeWordSet &wordSet,
uint32_t docIdLimit,
const std::string &namepref,
- bool dynamicK)
+ bool dynamicK, bool encode_cheap_features)
{
const char *dynamicKStr = dynamicK ? "true" : "false";
@@ -359,14 +365,15 @@ writeField(FakeWordSet &wordSet,
LOG(info,
"enter writeField, "
- "namepref=%s, dynamicK=%s",
+ "namepref=%s, dynamicK=%s, encode_cheap_features=%s",
namepref.c_str(),
- dynamicKStr);
+ dynamicKStr,
+ bool_to_str(encode_cheap_features));
tv.SetNow();
before = tv.Secs();
WrappedFieldWriter ostate(namepref,
- dynamicK,
- wordSet.getNumWords(), docIdLimit);
+ dynamicK, encode_cheap_features,
+ wordSet.getNumWords(), docIdLimit);
FieldWriter::remove(dirprefix + namepref);
ostate.open();
@@ -384,10 +391,11 @@ writeField(FakeWordSet &wordSet,
after = tv.Secs();
LOG(info,
"leave writeField, "
- "namepref=%s, dynamicK=%s"
+ "namepref=%s, dynamicK=%s, encode_cheap_features=%s"
" elapsed=%10.6f",
namepref.c_str(),
dynamicKStr,
+ bool_to_str(encode_cheap_features),
after - before);
}
@@ -397,6 +405,7 @@ readField(FakeWordSet &wordSet,
uint32_t docIdLimit,
const std::string &namepref,
bool dynamicK,
+ bool decode_cheap_features,
bool verbose)
{
const char *dynamicKStr = dynamicK ? "true" : "false";
@@ -408,9 +417,10 @@ readField(FakeWordSet &wordSet,
docIdLimit);
LOG(info,
"enter readField, "
- "namepref=%s, dynamicK=%s",
+ "namepref=%s, dynamicK=%s, decode_cheap_features=%s",
namepref.c_str(),
- dynamicKStr);
+ dynamicKStr,
+ bool_to_str(decode_cheap_features));
tv.SetNow();
before = tv.Secs();
istate.open();
@@ -425,7 +435,7 @@ readField(FakeWordSet &wordSet,
TermFieldMatchDataArray tfmda;
tfmda.add(&mdfield1);
- word->validate(*istate._fieldReader, wordNum, tfmda, verbose);
+ word->validate(*istate._fieldReader, wordNum, tfmda, decode_cheap_features, verbose);
++wordNum;
}
}
@@ -435,10 +445,11 @@ readField(FakeWordSet &wordSet,
after = tv.Secs();
LOG(info,
"leave readField, "
- "namepref=%s, dynamicK=%s"
+ "namepref=%s, dynamicK=%s, decode_cheap_features=%s"
" elapsed=%10.6f",
namepref.c_str(),
dynamicKStr,
+ bool_to_str(decode_cheap_features),
after - before);
}
@@ -555,7 +566,8 @@ fusionField(uint32_t numWordIds,
const vespalib::string &ipref,
const vespalib::string &opref,
bool doRaw,
- bool dynamicK)
+ bool dynamicK,
+ bool encode_cheap_features)
{
const char *rawStr = doRaw ? "true" : "false";
const char *dynamicKStr = dynamicK ? "true" : "false";
@@ -564,18 +576,18 @@ fusionField(uint32_t numWordIds,
LOG(info,
"enter fusionField, ipref=%s, opref=%s,"
" raw=%s,"
- " dynamicK=%s",
+ " dynamicK=%s, encode_cheap_features=%s",
ipref.c_str(),
opref.c_str(),
rawStr,
- dynamicKStr);
+ dynamicKStr, bool_to_str(encode_cheap_features));
FastOS_Time tv;
double before;
double after;
WrappedFieldWriter ostate(opref,
- dynamicK,
- numWordIds, docIdLimit);
+ dynamicK, encode_cheap_features,
+ numWordIds, docIdLimit);
WrappedFieldReader istate(ipref, numWordIds, docIdLimit);
tv.SetNow();
@@ -603,12 +615,12 @@ fusionField(uint32_t numWordIds,
after = tv.Secs();
LOG(info,
"leave fusionField, ipref=%s, opref=%s,"
- " raw=%s dynamicK=%s, "
+ " raw=%s dynamicK=%s, encode_cheap_features=%s,"
" elapsed=%10.6f",
ipref.c_str(),
opref.c_str(),
rawStr,
- dynamicKStr,
+ dynamicKStr, bool_to_str(encode_cheap_features),
after - before);
}
@@ -617,19 +629,20 @@ void
testFieldWriterVariant(FakeWordSet &wordSet, uint32_t doc_id_limit,
const vespalib::string &file_name_prefix,
bool dynamic_k,
+ bool encode_cheap_features,
bool verbose)
{
- writeField(wordSet, doc_id_limit, file_name_prefix, dynamic_k);
- readField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, verbose);
+ writeField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, encode_cheap_features);
+ readField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, encode_cheap_features, verbose);
randReadField(wordSet, file_name_prefix, dynamic_k, verbose);
fusionField(wordSet.getNumWords(),
doc_id_limit,
file_name_prefix, file_name_prefix + "x",
- false, dynamic_k);
+ false, dynamic_k, encode_cheap_features);
fusionField(wordSet.getNumWords(),
doc_id_limit,
file_name_prefix, file_name_prefix + "xx",
- true, dynamic_k);
+ true, dynamic_k, encode_cheap_features);
check_fusion(file_name_prefix);
remove_field(file_name_prefix);
}
@@ -639,14 +652,15 @@ testFieldWriterVariants(FakeWordSet &wordSet,
uint32_t docIdLimit, bool verbose)
{
disableSkip();
- testFieldWriterVariant(wordSet, docIdLimit, "new4", true, verbose);
- testFieldWriterVariant(wordSet, docIdLimit, "new5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "new4", true, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "new5", false, false, verbose);
enableSkip();
- testFieldWriterVariant(wordSet, docIdLimit, "newskip4", true, verbose);
- testFieldWriterVariant(wordSet, docIdLimit, "newskip5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newskip4", true, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newskip5", false, false, verbose);
enableSkipChunks();
- testFieldWriterVariant(wordSet, docIdLimit, "newchunk4", true, verbose);
- testFieldWriterVariant(wordSet, docIdLimit, "newchunk5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newchunk4", true, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newchunk5", false, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newchunkcf4", true, true, verbose);
}
@@ -655,14 +669,14 @@ testFieldWriterVariantsWithHighLids(FakeWordSet &wordSet, uint32_t docIdLimit,
bool verbose)
{
disableSkip();
- testFieldWriterVariant(wordSet, docIdLimit, "hlid4", true, verbose);
- testFieldWriterVariant(wordSet, docIdLimit, "hlid5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlid4", true, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlid5", false, false, verbose);
enableSkip();
- testFieldWriterVariant(wordSet, docIdLimit, "hlidskip4", true, verbose);
- testFieldWriterVariant(wordSet, docIdLimit, "hlidskip5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidskip4", true, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidskip5", false, false, verbose);
enableSkipChunks();
- testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk4", true, verbose);
- testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk4", true, false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk5", false, false, verbose);
}
int
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
index 7c52727f90e..bd30074836e 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
@@ -37,6 +37,7 @@ FieldWriter::open(const vespalib::string &prefix,
uint32_t minSkipDocs,
uint32_t minChunkDocs,
bool dynamicKPosOccFormat,
+ bool encode_cheap_features,
const Schema &schema,
const uint32_t indexId,
const TuneFileSeqWrite &tuneFileWrite,
@@ -62,7 +63,10 @@ FieldWriter::open(const vespalib::string &prefix,
countParams.set("minChunkDocs", minChunkDocs);
params.set("minChunkDocs", minChunkDocs);
}
-
+ if (encode_cheap_features) {
+ params.set("cheap_features", encode_cheap_features);
+ }
+
_dictFile = std::make_unique<PageDict4FileSeqWrite>();
_dictFile->setParams(countParams);
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
index e5aa9788071..c71bc4f4132 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
@@ -68,7 +68,9 @@ public:
uint64_t getSparseWordNum() const { return _wordNum; }
bool open(const vespalib::string &prefix, uint32_t minSkipDocs, uint32_t minChunkDocs,
- bool dynamicKPosOccFormat, const Schema &schema, uint32_t indexId,
+ bool dynamicKPosOccFormat,
+ bool encode_cheap_features,
+ const Schema &schema, uint32_t indexId,
const TuneFileSeqWrite &tuneFileWrite,
const search::common::FileHeaderContext &fileHeaderContext);
diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
index ed311b682e6..6d95f87b6a6 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
@@ -320,7 +320,7 @@ Fusion::openFieldWriter(const SchemaUtil::IndexIterator &index,
if (!writer.open(dir + "/",
64,
262144,
- _dynamicKPosIndexFormat,
+ _dynamicKPosIndexFormat, false,
index.getSchema(),
index.getIndex(),
_tuneFileIndexing._write,
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
index 42f6971e53f..7a6e24f2529 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
@@ -97,7 +97,7 @@ FileHandle::open(vespalib::stringref dir,
_fieldWriter = std::make_shared<FieldWriter>(docIdLimit, numWordIds);
- if (!_fieldWriter->open(dir + "/", 64, 262144u, false,
+ if (!_fieldWriter->open(dir + "/", 64, 262144u, false, false,
index.getSchema(), index.getIndex(),
tuneFileWrite, fileHeaderContext)) {
LOG(error, "Could not open term writer %s for write (%s)",
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
index 601451dc6c4..5076a88603a 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
@@ -562,6 +562,7 @@ bool
FakeWord::validate(FieldReader &fieldReader,
uint32_t wordNum,
const fef::TermFieldMatchDataArray &matchData,
+ bool decode_cheap_features,
bool verbose) const
{
uint32_t docId = 0;
@@ -593,6 +594,10 @@ FakeWord::validate(FieldReader &fieldReader,
docId = features.doc_id();
assert(d != de);
assert(d->_docId == docId);
+ if (decode_cheap_features) {
+ assert(d->_collapsedDocWordFeatures._field_len == features.field_length());
+ assert(d->_collapsedDocWordFeatures._num_occs == features.num_occs());
+ }
if (matchData.valid()) {
#ifdef notyet
unpres = features.unpack(matchData);
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
index 106c0c0d9ab..48aaf9f41ed 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
@@ -253,6 +253,7 @@ public:
validate(search::diskindex::FieldReader &fieldReader,
uint32_t wordNum,
const fef::TermFieldMatchDataArray &matchData,
+ bool decode_cheap_features,
bool verbose) const;
void validate(const std::vector<uint32_t> &docIds) const;