aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-05-09 12:52:57 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-05-09 12:52:57 +0200
commitb2cbefcb3eb68ad3985514e7451edade90ecad62 (patch)
tree80f4280b479dcdb63ad897a84cfd155ab9816ef5 /searchlib
parentc3667718a63a8703bf62833dcb92b7ad5422d0cc (diff)
Simplify field writer unit test.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt3
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp197
-rwxr-xr-xsearchlib/src/tests/diskindex/fieldwriter/runtests.sh67
3 files changed, 108 insertions, 159 deletions
diff --git a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt
index c2dcb83a423..b8666524c51 100644
--- a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt
+++ b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt
@@ -6,5 +6,4 @@ vespa_add_executable(searchlib_fieldwriter_test_app TEST
searchlib_test
searchlib
)
-vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/runtests.sh
- DEPENDS searchlib_fieldwriter_test_app)
+vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND searchlib_fieldwriter_test_app)
diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
index 46ee5e15757..84a57e8aca7 100644
--- a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
+++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
@@ -19,6 +19,7 @@
#include <vespa/searchlib/diskindex/pagedict4randread.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/fastos/time.h>
+#include <openssl/sha.h>
#include <vespa/fastos/app.h>
#include <vespa/log/log.h>
LOG_SETUP("fieldwriter_test");
@@ -46,6 +47,7 @@ using search::index::SchemaUtil;
using search::index::schema::CollectionType;
using search::index::schema::DataType;
using search::queryeval::SearchIterator;
+using vespalib::alloc::Alloc;
using namespace search::index;
@@ -273,6 +275,76 @@ WrappedFieldReader::close()
}
+class FileChecksum
+{
+ unsigned char _digest[SHA256_DIGEST_LENGTH];
+
+public:
+ FileChecksum(const vespalib::string &file_name);
+ bool operator==(const FileChecksum &rhs) const {
+ return (memcmp(_digest, rhs._digest, SHA256_DIGEST_LENGTH) == 0);
+ }
+};
+
+
+FileChecksum::FileChecksum(const vespalib::string &file_name)
+{
+ SHA256_CTX c;
+ FastOS_File f;
+ Alloc buf = Alloc::alloc(65536);
+ vespalib::string full_file_name(dirprefix + file_name);
+ bool openres = f.OpenReadOnly(full_file_name.c_str());
+ if (!openres) {
+ LOG(error, "Could not open %s for sha256 checksum", full_file_name.c_str());
+ LOG_ABORT("should not be reached");
+ }
+ int64_t flen = f.GetSize();
+ int64_t remainder = flen;
+ SHA256_Init(&c);
+ while (remainder > 0) {
+ int64_t thistime =
+ std::min(remainder, static_cast<int64_t>(buf.size()));
+ f.ReadBuf(buf.get(), thistime);
+ SHA256_Update(&c, buf.get(), thistime);
+ remainder -= thistime;
+ }
+ f.Close();
+ SHA256_Final(_digest, &c);
+}
+
+void
+compare_files(const vespalib::string &file_name_prefix, const vespalib::string &file_name_suffix)
+{
+ FileChecksum baseline_checksum(file_name_prefix + file_name_suffix);
+ FileChecksum cooked_fusion_checksum(file_name_prefix + "x" + file_name_suffix);
+ FileChecksum raw_fusion_checksum(file_name_prefix + "xx" + file_name_suffix);
+ assert(baseline_checksum == cooked_fusion_checksum);
+ assert(baseline_checksum == raw_fusion_checksum);
+}
+
+std::vector<vespalib::string> suffixes = {
+ "boolocc.bdat", "boolocc.idx",
+ "posocc.dat.compressed",
+ "dictionary.pdat", "dictionary.spdat", "dictionary.ssdat"
+};
+
+void
+check_fusion(const vespalib::string file_name_prefix)
+{
+ for (const auto &file_name_suffix : suffixes) {
+ compare_files(file_name_prefix, file_name_suffix);
+ }
+}
+
+void
+remove_field(const vespalib::string &file_name_prefix)
+{
+ vespalib::string remove_prefix(dirprefix + file_name_prefix);
+ FieldWriter::remove(remove_prefix);
+ FieldWriter::remove(remove_prefix + "x");
+ FieldWriter::remove(remove_prefix + "xx");
+}
+
void
writeField(FakeWordSet &wordSet,
uint32_t docIdLimit,
@@ -295,7 +367,7 @@ writeField(FakeWordSet &wordSet,
WrappedFieldWriter ostate(namepref,
dynamicK,
wordSet.getNumWords(), docIdLimit);
- FieldWriter::remove(namepref);
+ FieldWriter::remove(dirprefix + namepref);
ostate.open();
unsigned int wordNum = 1;
@@ -542,83 +614,39 @@ fusionField(uint32_t numWordIds,
void
+testFieldWriterVariant(FakeWordSet &wordSet, uint32_t doc_id_limit,
+ const vespalib::string &file_name_prefix,
+ bool dynamic_k,
+ bool verbose)
+{
+ writeField(wordSet, doc_id_limit, file_name_prefix, dynamic_k);
+ readField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, verbose);
+ randReadField(wordSet, file_name_prefix, dynamic_k, verbose);
+ fusionField(wordSet.getNumWords(),
+ doc_id_limit,
+ file_name_prefix, file_name_prefix + "x",
+ false, dynamic_k);
+ fusionField(wordSet.getNumWords(),
+ doc_id_limit,
+ file_name_prefix, file_name_prefix + "xx",
+ true, dynamic_k);
+ check_fusion(file_name_prefix);
+ remove_field(file_name_prefix);
+}
+
+void
testFieldWriterVariants(FakeWordSet &wordSet,
uint32_t docIdLimit, bool verbose)
{
disableSkip();
- writeField(wordSet, docIdLimit, "new4", true);
- readField(wordSet, docIdLimit, "new4", true, verbose);
- readField(wordSet, docIdLimit, "new4", true, verbose);
- writeField(wordSet, docIdLimit, "new5", false);
- readField(wordSet, docIdLimit, "new5", false, verbose);
- enableSkip();
- writeField(wordSet, docIdLimit, "newskip4", true);
- readField(wordSet, docIdLimit, "newskip4", true, verbose);
- writeField(wordSet, docIdLimit, "newskip5", false);
- readField(wordSet, docIdLimit, "newskip5", false, verbose);
- enableSkipChunks();
- writeField(wordSet, docIdLimit, "newchunk4", true);
- readField(wordSet, docIdLimit, "newchunk4", true, verbose);
- writeField(wordSet, docIdLimit, "newchunk5", false);
- readField(wordSet, docIdLimit,
- "newchunk5",false, verbose);
- disableSkip();
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "new4", "new4x",
- false, true);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "new4", "new4xx",
- true, true);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "new5", "new5x",
- false, false);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "new5", "new5xx",
- true, false);
- randReadField(wordSet, "new4", true, verbose);
- randReadField(wordSet, "new5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "new4", true, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "new5", false, verbose);
enableSkip();
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newskip4", "newskip4x",
- false, true);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newskip4", "newskip4xx",
- true, true);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newskip5", "newskip5x",
- false, false);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newskip5", "newskip5xx",
- true, false);
- randReadField(wordSet, "newskip4", true, verbose);
- randReadField(wordSet, "newskip5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newskip4", true, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newskip5", false, verbose);
enableSkipChunks();
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newchunk4", "newchunk4x",
- false, true);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newchunk4", "newchunk4xx",
- true, true);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newchunk5", "newchunk5x",
- false, false);
- fusionField(wordSet.getNumWords(),
- docIdLimit,
- "newchunk5", "newchunk5xx",
- true, false);
- randReadField(wordSet, "newchunk4", true, verbose);
- randReadField(wordSet, "newchunk5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newchunk4", true, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "newchunk5", false, verbose);
}
@@ -627,26 +655,14 @@ testFieldWriterVariantsWithHighLids(FakeWordSet &wordSet, uint32_t docIdLimit,
bool verbose)
{
disableSkip();
- writeField(wordSet, docIdLimit, "hlid4", true);
- readField(wordSet, docIdLimit, "hlid4", true, verbose);
- writeField(wordSet, docIdLimit, "hlid5", false);
- readField(wordSet, docIdLimit, "hlid5", false, verbose);
- randReadField(wordSet, "hlid4", true, verbose);
- randReadField(wordSet, "hlid5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlid4", true, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlid5", false, verbose);
enableSkip();
- writeField(wordSet, docIdLimit, "hlidskip4", true);
- readField(wordSet, docIdLimit, "hlidskip4", true, verbose);
- writeField(wordSet, docIdLimit, "hlidskip5", false);
- readField(wordSet, docIdLimit, "hlidskip5", false, verbose);
- randReadField(wordSet, "hlidskip4", true, verbose);
- randReadField(wordSet, "hlidskip5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidskip4", true, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidskip5", false, verbose);
enableSkipChunks();
- writeField(wordSet, docIdLimit, "hlidchunk4", true);
- readField(wordSet, docIdLimit, "hlidchunk4", true, verbose);
- writeField(wordSet, docIdLimit, "hlidchunk5", false);
- readField(wordSet, docIdLimit, "hlidchunk5", false, verbose);
- randReadField(wordSet, "hlidchunk4", true, verbose);
- randReadField(wordSet, "hlidchunk5", false, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk4", true, verbose);
+ testFieldWriterVariant(wordSet, docIdLimit, "hlidchunk5", false, verbose);
}
int
@@ -700,6 +716,7 @@ FieldWriterTest::Main()
_wordSet2.addDocIdBias(docIdBias); // Large skip numbers
testFieldWriterVariantsWithHighLids(_wordSet2, _numDocs + docIdBias,
_verbose);
+ vespalib::rmdir("index", true);
return 0;
}
diff --git a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh
deleted file mode 100755
index 0574819ca24..00000000000
--- a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-set -e
-
-rm -f new* chkpt*
-sync
-sleep 2
-
-if ${VALGRIND} ./searchlib_fieldwriter_test_app "$@"
-then
- :
-else
- echo FAILURE: ./searchlib_fieldwriter_test_app program failed.
- exit 1
-fi
-
-checksame()
-{
- file1=$1
- rval=0
- shift
- for file in $*
- do
- if cmp -s $file1 $file
- then
- :
- else
- echo "FAILURE: $file1 != $file"
- rval=1
- fi
- done
- return $rval
-}
-
-newpcntfiles1=index/new[46]*dictionary.pdat
-newpcntfiles1b=index/new[46]*dictionary.spdat
-newpcntfiles1c=index/new[46]*dictionary.ssdat
-newpcntfiles2=index/newskip[46]*dictionary.pdat
-newpcntfiles2b=index/newskip[46]*dictionary.pdat
-newpcntfiles2c=index/newskip[46]*dictionary.pdat
-newpcntfiles3=index/newchunk[46]*dictionary.pdat
-newpcntfiles3b=index/newchunk[46]*dictionary.pdat
-newpcntfiles3c=index/newchunk[46]*dictionary.pdat
-newpcntfiles4=index/new[57]*dictionary.pdat
-newpcntfiles4b=index/new[57]*dictionary.pdat
-newpcntfiles4c=index/new[57]*dictionary.pdat
-newpcntfiles5=index/newskip[57]*dictionary.pdat
-newpcntfiles5b=index/newskip[57]*dictionary.pdat
-newpcntfiles5c=index/newskip[57]*dictionary.pdat
-newpcntfiles6=index/newchunk[57]*dictionary.pdat
-newpcntfiles6b=index/newchunk[57]*dictionary.pdat
-newpcntfiles6c=index/newchunk[57]*dictionary.pdat
-newpfiles1=index/new[46]*posocc.dat.compressed
-newpfiles2=index/newskip[46]*posocc.dat.compressed
-newpfiles3=index/newchunk[46]*posocc.dat.compressed
-newpfiles4=index/new[57]*posocc.dat.compressed
-newpfiles5=index/newskip[57]*posocc.dat.compressed
-newpfiles6=index/newchunk[57]*posocc.dat.compressed
-
-if checksame $newpcntfiles1 && checksame $newpcntfiles1b && checksame $newpcntfiles1c && checksame $newpfiles1 && checksame $newpcntfiles2 && checksame $newpcntfiles2b && checksame $newpcntfiles2c && checksame $newpfiles2 && checksame $newpcntfiles3 && checksame $newpcntfiles3b && checksame $newpcntfiles3c && checksame $newpfiles3 && checksame $newpcntfiles4 && checksame $newpcntfiles4b && checksame $newpcntfiles4c && checksame $newpfiles4 && checksame $newpcntfiles5 && checksame $newpcntfiles5b && checksame $newpcntfiles5c && checksame $newpfiles5 && checksame $newpcntfiles6 && checksame $newpcntfiles6b && checksame $newpcntfiles6c && checksame $newpfiles6
-then
- echo SUCCESS: Files match up
- exit 0
-else
- echo FAILURE: Files do not match up
- exit 1
-fi