diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-05-09 12:52:57 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2019-05-09 12:52:57 +0200 |
commit | b2cbefcb3eb68ad3985514e7451edade90ecad62 (patch) | |
tree | 80f4280b479dcdb63ad897a84cfd155ab9816ef5 /searchlib | |
parent | c3667718a63a8703bf62833dcb92b7ad5422d0cc (diff) |
Simplify field writer unit test.
Diffstat (limited to 'searchlib')
3 files changed, 108 insertions, 159 deletions
diff --git a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt index c2dcb83a423..b8666524c51 100644 --- a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt +++ b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt @@ -6,5 +6,4 @@ vespa_add_executable(searchlib_fieldwriter_test_app TEST searchlib_test searchlib ) -vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/runtests.sh - DEPENDS searchlib_fieldwriter_test_app) +vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND searchlib_fieldwriter_test_app) diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp index 46ee5e15757..84a57e8aca7 100644 --- a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp +++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp @@ -19,6 +19,7 @@ #include <vespa/searchlib/diskindex/pagedict4randread.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/fastos/time.h> +#include <openssl/sha.h> #include <vespa/fastos/app.h> #include <vespa/log/log.h> LOG_SETUP("fieldwriter_test"); @@ -46,6 +47,7 @@ using search::index::SchemaUtil; using search::index::schema::CollectionType; using search::index::schema::DataType; using search::queryeval::SearchIterator; +using vespalib::alloc::Alloc; using namespace search::index; @@ -273,6 +275,76 @@ WrappedFieldReader::close() } +class FileChecksum +{ + unsigned char _digest[SHA256_DIGEST_LENGTH]; + +public: + FileChecksum(const vespalib::string &file_name); + bool operator==(const FileChecksum &rhs) const { + return (memcmp(_digest, rhs._digest, SHA256_DIGEST_LENGTH) == 0); + } +}; + + +FileChecksum::FileChecksum(const vespalib::string &file_name) +{ + SHA256_CTX c; + FastOS_File f; + Alloc buf = Alloc::alloc(65536); + vespalib::string full_file_name(dirprefix + file_name); + bool openres = f.OpenReadOnly(full_file_name.c_str()); + if (!openres) { + LOG(error, "Could not open %s for sha256 checksum", full_file_name.c_str()); + LOG_ABORT("should not be reached"); + } + int64_t flen = f.GetSize(); + int64_t remainder = flen; + SHA256_Init(&c); + while (remainder > 0) { + int64_t thistime = + std::min(remainder, static_cast<int64_t>(buf.size())); + f.ReadBuf(buf.get(), thistime); + SHA256_Update(&c, buf.get(), thistime); + remainder -= thistime; + } + f.Close(); + SHA256_Final(_digest, &c); +} + +void +compare_files(const vespalib::string &file_name_prefix, const vespalib::string &file_name_suffix) +{ + FileChecksum baseline_checksum(file_name_prefix + file_name_suffix); + FileChecksum cooked_fusion_checksum(file_name_prefix + "x" + file_name_suffix); + FileChecksum raw_fusion_checksum(file_name_prefix + "xx" + file_name_suffix); + assert(baseline_checksum == cooked_fusion_checksum); + assert(baseline_checksum == raw_fusion_checksum); +} + +std::vector<vespalib::string> suffixes = { + "boolocc.bdat", "boolocc.idx", + "posocc.dat.compressed", + "dictionary.pdat", "dictionary.spdat", "dictionary.ssdat" +}; + +void +check_fusion(const vespalib::string file_name_prefix) +{ + for (const auto &file_name_suffix : suffixes) { + compare_files(file_name_prefix, file_name_suffix); + } +} + +void +remove_field(const vespalib::string &file_name_prefix) +{ + vespalib::string remove_prefix(dirprefix + file_name_prefix); + FieldWriter::remove(remove_prefix); + FieldWriter::remove(remove_prefix + "x"); + FieldWriter::remove(remove_prefix + "xx"); +} + void writeField(FakeWordSet &wordSet, uint32_t docIdLimit, @@ -295,7 +367,7 @@ writeField(FakeWordSet &wordSet, WrappedFieldWriter ostate(namepref, dynamicK, wordSet.getNumWords(), docIdLimit); - FieldWriter::remove(namepref); + FieldWriter::remove(dirprefix + namepref); ostate.open(); unsigned int wordNum = 1; @@ -542,83 +614,39 @@ fusionField(uint32_t numWordIds, void +testFieldWriterVariant(FakeWordSet &wordSet, uint32_t doc_id_limit, + const vespalib::string &file_name_prefix, + bool dynamic_k, + bool verbose) +{ + writeField(wordSet, doc_id_limit, file_name_prefix, dynamic_k); + readField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, verbose); + randReadField(wordSet, file_name_prefix, dynamic_k, verbose); + fusionField(wordSet.getNumWords(), + doc_id_limit, + file_name_prefix, file_name_prefix + "x", + false, dynamic_k); + fusionField(wordSet.getNumWords(), + doc_id_limit, + file_name_prefix, file_name_prefix + "xx", + true, dynamic_k); + check_fusion(file_name_prefix); + remove_field(file_name_prefix); +} + +void testFieldWriterVariants(FakeWordSet &wordSet, uint32_t docIdLimit, bool verbose) { disableSkip(); - writeField(wordSet, docIdLimit, "new4", true); - readField(wordSet, docIdLimit, "new4", true, verbose); - readField(wordSet, docIdLimit, "new4", true, verbose); - writeField(wordSet, docIdLimit, "new5", false); - readField(wordSet, docIdLimit, "new5", false, verbose); - enableSkip(); - writeField(wordSet, docIdLimit, "newskip4", true); - readField(wordSet, docIdLimit, "newskip4", true, verbose); - writeField(wordSet, docIdLimit, "newskip5", false); - readField(wordSet, docIdLimit, "newskip5", false, verbose); - enableSkipChunks(); - writeField(wordSet, docIdLimit, "newchunk4", true); - readField(wordSet, docIdLimit, "newchunk4", true, verbose); - writeField(wordSet, docIdLimit, "newchunk5", false); - readField(wordSet, docIdLimit, - "newchunk5",false, verbose); - disableSkip(); - fusionField(wordSet.getNumWords(), - docIdLimit, - "new4", "new4x", - false, true); - fusionField(wordSet.getNumWords(), - docIdLimit, - "new4", "new4xx", - true, true); - fusionField(wordSet.getNumWords(), - docIdLimit, - "new5", "new5x", - false, false); - fusionField(wordSet.getNumWords(), - docIdLimit, - "new5", "new5xx", - true, false); - randReadField(wordSet, "new4", true, verbose); - randReadField(wordSet, "new5", false, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "new4", true, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "new5", false, verbose); enableSkip(); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newskip4", "newskip4x", - false, true); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newskip4", "newskip4xx", - true, true); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newskip5", "newskip5x", - false, false); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newskip5", "newskip5xx", - true, false); - randReadField(wordSet, "newskip4", true, verbose); - randReadField(wordSet, "newskip5", false, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "newskip4", true, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "newskip5", false, verbose); enableSkipChunks(); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newchunk4", "newchunk4x", - false, true); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newchunk4", "newchunk4xx", - true, true); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newchunk5", "newchunk5x", - false, false); - fusionField(wordSet.getNumWords(), - docIdLimit, - "newchunk5", "newchunk5xx", - true, false); - randReadField(wordSet, "newchunk4", true, verbose); - randReadField(wordSet, "newchunk5", false, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "newchunk4", true, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "newchunk5", false, verbose); } @@ -627,26 +655,14 @@ testFieldWriterVariantsWithHighLids(FakeWordSet &wordSet, uint32_t docIdLimit, bool verbose) { disableSkip(); - writeField(wordSet, docIdLimit, "hlid4", true); - readField(wordSet, docIdLimit, "hlid4", true, verbose); - writeField(wordSet, docIdLimit, "hlid5", false); - readField(wordSet, docIdLimit, "hlid5", false, verbose); - randReadField(wordSet, "hlid4", true, verbose); - randReadField(wordSet, "hlid5", false, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "hlid4", true, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "hlid5", false, verbose); enableSkip(); - writeField(wordSet, docIdLimit, "hlidskip4", true); - readField(wordSet, docIdLimit, "hlidskip4", true, verbose); - writeField(wordSet, docIdLimit, "hlidskip5", false); - readField(wordSet, docIdLimit, "hlidskip5", false, verbose); - randReadField(wordSet, "hlidskip4", true, verbose); - randReadField(wordSet, "hlidskip5", false, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "hlidskip4", true, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "hlidskip5", false, verbose); enableSkipChunks(); - writeField(wordSet, docIdLimit, "hlidchunk4", true); - readField(wordSet, docIdLimit, "hlidchunk4", true, verbose); - writeField(wordSet, docIdLimit, "hlidchunk5", false); - readField(wordSet, docIdLimit, "hlidchunk5", false, verbose); - randReadField(wordSet, "hlidchunk4", true, verbose); - randReadField(wordSet, "hlidchunk5", false, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk4", true, verbose); + testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk5", false, verbose); } int @@ -700,6 +716,7 @@ FieldWriterTest::Main() _wordSet2.addDocIdBias(docIdBias); // Large skip numbers testFieldWriterVariantsWithHighLids(_wordSet2, _numDocs + docIdBias, _verbose); + vespalib::rmdir("index", true); return 0; } diff --git a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh deleted file mode 100755 index 0574819ca24..00000000000 --- a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -set -e - -rm -f new* chkpt* -sync -sleep 2 - -if ${VALGRIND} ./searchlib_fieldwriter_test_app "$@" -then - : -else - echo FAILURE: ./searchlib_fieldwriter_test_app program failed. - exit 1 -fi - -checksame() -{ - file1=$1 - rval=0 - shift - for file in $* - do - if cmp -s $file1 $file - then - : - else - echo "FAILURE: $file1 != $file" - rval=1 - fi - done - return $rval -} - -newpcntfiles1=index/new[46]*dictionary.pdat -newpcntfiles1b=index/new[46]*dictionary.spdat -newpcntfiles1c=index/new[46]*dictionary.ssdat -newpcntfiles2=index/newskip[46]*dictionary.pdat -newpcntfiles2b=index/newskip[46]*dictionary.pdat -newpcntfiles2c=index/newskip[46]*dictionary.pdat -newpcntfiles3=index/newchunk[46]*dictionary.pdat -newpcntfiles3b=index/newchunk[46]*dictionary.pdat -newpcntfiles3c=index/newchunk[46]*dictionary.pdat -newpcntfiles4=index/new[57]*dictionary.pdat -newpcntfiles4b=index/new[57]*dictionary.pdat -newpcntfiles4c=index/new[57]*dictionary.pdat -newpcntfiles5=index/newskip[57]*dictionary.pdat -newpcntfiles5b=index/newskip[57]*dictionary.pdat -newpcntfiles5c=index/newskip[57]*dictionary.pdat -newpcntfiles6=index/newchunk[57]*dictionary.pdat -newpcntfiles6b=index/newchunk[57]*dictionary.pdat -newpcntfiles6c=index/newchunk[57]*dictionary.pdat -newpfiles1=index/new[46]*posocc.dat.compressed -newpfiles2=index/newskip[46]*posocc.dat.compressed -newpfiles3=index/newchunk[46]*posocc.dat.compressed -newpfiles4=index/new[57]*posocc.dat.compressed -newpfiles5=index/newskip[57]*posocc.dat.compressed -newpfiles6=index/newchunk[57]*posocc.dat.compressed - -if checksame $newpcntfiles1 && checksame $newpcntfiles1b && checksame $newpcntfiles1c && checksame $newpfiles1 && checksame $newpcntfiles2 && checksame $newpcntfiles2b && checksame $newpcntfiles2c && checksame $newpfiles2 && checksame $newpcntfiles3 && checksame $newpcntfiles3b && checksame $newpcntfiles3c && checksame $newpfiles3 && checksame $newpcntfiles4 && checksame $newpcntfiles4b && checksame $newpcntfiles4c && checksame $newpfiles4 && checksame $newpcntfiles5 && checksame $newpcntfiles5b && checksame $newpcntfiles5c && checksame $newpfiles5 && checksame $newpcntfiles6 && checksame $newpcntfiles6b && checksame $newpcntfiles6c && checksame $newpfiles6 -then - echo SUCCESS: Files match up - exit 0 -else - echo FAILURE: Files do not match up - exit 1 -fi |