From 5190dcda5079bc746cd05d6296d84435a66faa4a Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 29 Aug 2022 16:29:28 +0000 Subject: GC some old unused code with ancient 'goto'..... --- .../tests/proton/summaryengine/summaryengine.cpp | 3 - .../proton/docsummary/summarymanager.cpp | 3 +- searchlib/src/tests/util/rawbuf_test.cpp | 17 -- .../attribute/attributememorysavetarget.h | 3 +- searchlib/src/vespa/searchlib/parsequery/parse.h | 1 - .../searchlib/query/tree/stackdumpcreator.cpp | 1 + searchlib/src/vespa/searchlib/util/rawbuf.cpp | 43 +-- searchlib/src/vespa/searchlib/util/rawbuf.h | 19 +- searchsummary/CMakeLists.txt | 1 - .../slime_summary/slime_summary_test.cpp | 1 + searchsummary/src/tests/extractkeywords/.gitignore | 7 - .../src/tests/extractkeywords/CMakeLists.txt | 11 - .../tests/extractkeywords/extractkeywordstest.cpp | 295 --------------------- .../tests/extractkeywords/extractkeywordstest.h | 25 -- .../src/tests/extractkeywords/runtests.sh | 22 -- .../src/tests/extractkeywords/simplequerystack.cpp | 36 --- .../src/tests/extractkeywords/simplequerystack.h | 49 ---- .../tests/extractkeywords/simplequerystackitem.cpp | 198 -------------- .../tests/extractkeywords/simplequerystackitem.h | 107 -------- .../searchsummary/docsummary/docsumwriter.cpp | 1 + .../vespa/searchsummary/docsummary/docsumwriter.h | 9 +- .../searchsummary/docsummary/dynamicteaserdfw.cpp | 2 +- .../searchsummary/docsummary/keywordextractor.cpp | 95 ------- .../searchsummary/docsummary/keywordextractor.h | 60 +---- .../vespa/searchsummary/docsummary/resultclass.h | 1 - .../src/vespa/searchvisitor/searchvisitor.h | 1 + .../src/vespa/vsm/vsm/vsm-adapter.cpp | 1 + 27 files changed, 31 insertions(+), 981 deletions(-) delete mode 100644 searchsummary/src/tests/extractkeywords/.gitignore delete mode 100644 searchsummary/src/tests/extractkeywords/CMakeLists.txt delete mode 100644 searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp delete mode 100644 searchsummary/src/tests/extractkeywords/extractkeywordstest.h delete mode 100755 searchsummary/src/tests/extractkeywords/runtests.sh delete mode 100644 searchsummary/src/tests/extractkeywords/simplequerystack.cpp delete mode 100644 searchsummary/src/tests/extractkeywords/simplequerystack.h delete mode 100644 searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp delete mode 100644 searchsummary/src/tests/extractkeywords/simplequerystackitem.h diff --git a/searchcore/src/tests/proton/summaryengine/summaryengine.cpp b/searchcore/src/tests/proton/summaryengine/summaryengine.cpp index f82b4c9243f..91827326074 100644 --- a/searchcore/src/tests/proton/summaryengine/summaryengine.cpp +++ b/searchcore/src/tests/proton/summaryengine/summaryengine.cpp @@ -4,12 +4,9 @@ #include #include #include -#include #include #include #include -#include -#include #include #include diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index 18c84038f9b..f398898ebb8 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -89,7 +90,7 @@ SummarySetup(const vespalib::string & baseDir, const SummaryConfig & summaryCfg, _juniperConfig(), _attributeMgr(std::move(attributeMgr)), _docStore(std::move(docStore)), - _repo(repo) + _repo(std::move(repo)) { auto resultConfig = std::make_unique(); if (!resultConfig->ReadConfig(summaryCfg, make_string("SummaryManager(%s)", baseDir.c_str()).c_str())) { diff --git a/searchlib/src/tests/util/rawbuf_test.cpp b/searchlib/src/tests/util/rawbuf_test.cpp index fd77b5b4ddb..ca7221d0240 100644 --- a/searchlib/src/tests/util/rawbuf_test.cpp +++ b/searchlib/src/tests/util/rawbuf_test.cpp @@ -116,23 +116,6 @@ TEST("require that prealloc makes enough room") { EXPECT_LESS_EQUAL(100u, buf.GetFreeLen()); } -TEST("require that rawbuf can read from file") { - FastOS_File file("mytemporaryfile"); - ASSERT_TRUE(file.OpenReadWrite()); - ASSERT_EQUAL(6, file.Write2("barbaz", 6)); - file.SetPosition(0); - - RawBuf buf(10); - buf += "foo"; - buf.readFile(file, 3); - EXPECT_EQUAL("foobar", getString(buf)); - buf.readFile(file, 100); - EXPECT_EQUAL("foobarbaz", getString(buf)); - - ASSERT_TRUE(file.Close()); - file.Delete(); -} - TEST("require that compact discards drained data") { RawBuf buf(10); buf += "foobar"; diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h index b32c8fc1663..58aba882e8d 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h +++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h @@ -5,7 +5,6 @@ #include "attributememoryfilewriter.h" #include "iattributesavetarget.h" #include -#include #include #include #include @@ -37,7 +36,7 @@ private: public: AttributeMemorySaveTarget(); - ~AttributeMemorySaveTarget(); + ~AttributeMemorySaveTarget() override; /** * Write the underlying buffer(s) to file(s). diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h index c3b5fcc81fa..1285125d34b 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.h +++ b/searchlib/src/vespa/searchlib/parsequery/parse.h @@ -3,7 +3,6 @@ #pragma once #include -#include #include namespace search { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index 5ed33ec18a1..9bb62e76c49 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -8,6 +8,7 @@ #include #include #include +#include using vespalib::string; using std::vector; diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.cpp b/searchlib/src/vespa/searchlib/util/rawbuf.cpp index d3cc9996c34..d64d85f2f1d 100644 --- a/searchlib/src/vespa/searchlib/util/rawbuf.cpp +++ b/searchlib/src/vespa/searchlib/util/rawbuf.cpp @@ -2,14 +2,12 @@ #include "rawbuf.h" #include -#include #include #include +#include namespace search { -static inline size_t smin(size_t a, size_t b) { return (a < b) ? a : b; } - RawBuf::RawBuf(size_t size) : _bufStart(nullptr), _bufEnd(nullptr), @@ -25,21 +23,6 @@ RawBuf::RawBuf(size_t size) _bufDrainPos = _bufFillPos = _bufStart; } - -RawBuf::RawBuf(char *start, size_t size) - : _bufStart(nullptr), - _bufEnd(nullptr), - _bufFillPos(nullptr), - _bufDrainPos(nullptr), - _initialBufStart(start), - _initialSize(size) -{ - _bufStart = start; - _bufEnd = _bufStart + size; - _bufDrainPos = _bufFillPos = _bufStart; -} - - RawBuf::~RawBuf() { if (_bufStart != _initialBufStart) @@ -109,17 +92,6 @@ RawBuf::appendCompressedNumber(int64_t n) _bufFillPos += vespalib::compress::Integer::compress(n, _bufFillPos); } - -/** - * Has the entire contents of the buffer been used up, i.e. freed? - */ -bool -RawBuf::IsEmpty() -{ - return _bufFillPos == _bufDrainPos; -} - - /** * Free 'len' bytes from the start of the contents. (These * have presumably been written or read.) @@ -345,19 +317,6 @@ RawBuf::addSignedHitRank(SignedHitRank num) append(buf1, strlen(buf1)); } -/** - * Read from the indicated file into the buffer, no more that the - * given number of bytes and no more than will fit in the buffer. - */ -size_t -RawBuf::readFile(FastOS_FileInterface &file, size_t maxlen) -{ - size_t got = file.Read(_bufFillPos, smin((_bufEnd - _bufFillPos), maxlen)); - if (got > 0) - _bufFillPos += got; - return got; -} - void RawBuf::ensureSizeInternal(size_t size) { expandBuf(size); diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.h b/searchlib/src/vespa/searchlib/util/rawbuf.h index e69c13e13b3..258e3da88ed 100644 --- a/searchlib/src/vespa/searchlib/util/rawbuf.h +++ b/searchlib/src/vespa/searchlib/util/rawbuf.h @@ -19,9 +19,6 @@ namespace search { class RawBuf { private: - RawBuf(const RawBuf &); - RawBuf& operator=(const RawBuf &); - char* _bufStart; // ref. to start of buffer (don't move this!) char* _bufEnd; // ref. to byte after last in buffer (don't mo) char* _bufFillPos; // ref. to byte where next should be put in @@ -31,9 +28,9 @@ private: void ensureSizeInternal(size_t size); public: - - RawBuf(char *start, size_t size);// Initially use provided buffer - RawBuf(size_t size); // malloc-s given size, assigns to _bufStart + RawBuf(const RawBuf &) = delete; + RawBuf& operator=(const RawBuf &) = delete; + explicit RawBuf(size_t size); // malloc-s given size, assigns to _bufStart ~RawBuf(); // Frees _bufStart, i.e. the char[]. void operator+=(const char *src); @@ -48,10 +45,8 @@ public: void append(const void *data, size_t len); void append(uint8_t byte); - void appendLong(uint64_t n); void appendCompressedPositiveNumber(uint64_t n); void appendCompressedNumber(int64_t n); - bool IsEmpty(); // Return whether all written. void expandBuf(size_t needlen); size_t GetFreeLen() const { return _bufEnd - _bufFillPos; } size_t GetDrainLen() const { return _bufDrainPos - _bufStart; } @@ -59,7 +54,6 @@ public: const char *GetFillPos() const { return _bufFillPos; } char * GetWritableFillPos() const { return _bufFillPos; } char * GetWritableFillPos(size_t len) { preAlloc(len); return _bufFillPos; } - char * GetWritableDrainPos(size_t offset) { return _bufDrainPos + offset; } void truncate(size_t offset) { _bufFillPos = _bufDrainPos + offset; } void preAlloc(size_t len); // Ensure room for 'len' more bytes. size_t readFile(FastOS_FileInterface &file, size_t maxlen); @@ -83,16 +77,11 @@ public: static uint16_t InetTo16(const unsigned char *src) { return (static_cast(*src) << 8) + *(src + 1); }; - static uint16_t InetTo16(const char* src) { - return InetTo16(reinterpret_cast(src)); - }; + static uint32_t InetTo32(const unsigned char* src) { return (((((static_cast(*src) << 8) + *(src + 1)) << 8) + *(src + 2)) << 8) + *(src + 3); }; - static uint32_t InetTo32(const char* src) { - return InetTo32(reinterpret_cast(src)); - }; /** * Convert unsigned int.s 'src', to interNet highendian order, at 'dst' diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index 60f85c07b86..d36bfdc0bae 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -22,6 +22,5 @@ vespa_define_module( src/tests/docsummary/matched_elements_filter src/tests/docsummary/slime_summary src/tests/docsummary/summary_field_converter - src/tests/extractkeywords src/tests/juniper ) diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index 445b08570a6..5e4d375c49f 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/searchsummary/src/tests/extractkeywords/.gitignore b/searchsummary/src/tests/extractkeywords/.gitignore deleted file mode 100644 index 1b50b24b284..00000000000 --- a/searchsummary/src/tests/extractkeywords/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -*.core -.depend -Makefile -core -core.* -extractkeywordstest -searchsummary_extractkeywordstest_app diff --git a/searchsummary/src/tests/extractkeywords/CMakeLists.txt b/searchsummary/src/tests/extractkeywords/CMakeLists.txt deleted file mode 100644 index 802bff92544..00000000000 --- a/searchsummary/src/tests/extractkeywords/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchsummary_extractkeywordstest_app TEST - SOURCES - extractkeywordstest.cpp - simplequerystack.cpp - simplequerystackitem.cpp - DEPENDS - searchsummary -) -vespa_add_test(NAME searchsummary_extractkeywordstest_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/runtests.sh - DEPENDS searchsummary_extractkeywordstest_app) diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp deleted file mode 100644 index 724cf338497..00000000000 --- a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "extractkeywordstest.h" -#include -#include -#include "simplequerystack.h" -#include -#include - -#define NUMTESTS 5 - -int -ExtractKeywordsTest::main(int argc, char **argv) -{ - int doTest[NUMTESTS]; - int low, high, accnum, num; - int indicator; - bool verify = false; - int multiplier = 1; - bool failed = false; - - if (argc == 1) - return Usage(argv[0]); - - // default initialize to not run any tests. - for (int n = 0; n < NUMTESTS; n++) - doTest[n] = 0; - - // parse the command line arguments - for (int i = 1; i < argc; i++) { - low = 0; - high = NUMTESTS - 1; - char *p = argv[i]; - - // Check if a multiplier is specified - if (*p == '*') { - p++; - accnum = 0; - while (*p != '\0') { - num = *p - '0'; - accnum = accnum * 10 + num; - p++; - } - multiplier = accnum; - continue; - } - - // Default is to run the tests specified, unless the first char is '/' - indicator = 1; - if (*p == '/') { - p++; - indicator = 0; - } - - // Find the first number - accnum = 0; - while (*p != '-' && *p != '\0') { - num = *p - '0'; - accnum = accnum * 10 + num; - p++; - } - if (accnum >= NUMTESTS) - continue; - low = accnum; - // Check for range operator - if (*p == '-') { - p++; - // Find the second number - accnum = 0; - while (*p != '\0') { - num = *p - '0'; - accnum = accnum * 10 + num; - p++; - } - if (accnum > 0) - high = accnum < NUMTESTS ? accnum : NUMTESTS-1; - } else - high = low; - - // Indicate the runrequest for the desired range. - for (int j = low; j <= high; j++) - doTest[j] = indicator; - } - - // Remove unused tests. - // doTest[1] = 0; - - // Remember time - if (multiplier > 1) { - printf("Running all tests %d times.\n", multiplier); - verify = false; - } else { - verify = true; - } - - int testCnt = 0; - - // init keyword extractor - _extractor = new search::docsummary::KeywordExtractor(nullptr); - _extractor->AddLegalIndexSpec("*"); - - vespalib::Timer timer; - - // Actually run the tests that we wanted. - for (int j = 0; j < multiplier; j++) - for (int k = 0; k < NUMTESTS; k++) - if (doTest[k] == 1) { - if (!RunTest(k, verify)) - failed = true; - testCnt++; - } - - // Print time taken - double timeTaken = vespalib::to_s(timer.elapsed())*1000.0; - - printf("Time taken : %f ms\n", timeTaken); - printf("Number of tests run: %d\n", testCnt); - double avgTestPrMSec = static_cast(testCnt) / timeTaken; - printf("Tests pr Sec: %f\n", avgTestPrMSec * 1000.0); - - delete _extractor; - _extractor = nullptr; - - return failed ? 1 : 0; -} - -bool -ExtractKeywordsTest::ShowResult(int testNo, - const char *actual, const char *correct) -{ - const char *act_word = actual; - const char *cor_word = correct; - printf("%03d: ", testNo); - - while (*act_word != '\0') { - if (strcmp(act_word, cor_word) != 0) { - printf("fail. Keywords differ for act: %s, corr: %s\n", - act_word, cor_word); - return false; - } else { - act_word += strlen(act_word) + 1; - cor_word += strlen(cor_word) + 1; - } - } - if (*cor_word != '\0') { - printf("fail. actual list shorter than correct at %s\n", cor_word); - return false; - } - printf("ok\n"); - return true; -} - -/** - * - * @param testno The test to run. - * @param verify Verify the result of the test. - */ -bool -ExtractKeywordsTest::RunTest(int testno, bool verify) -{ - search::SimpleQueryStack stack; - search::RawBuf buf(32_Ki); - const char *correct = nullptr; - const char *keywords = nullptr; - - switch (testno) { - case 0: - { - // Simple term query - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); - - stack.AppendBuffer(&buf); - keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); - correct = "foobar\0\0"; - - if (verify) ShowResult(testno, keywords, correct); - free(const_cast(keywords)); - break; - } - - case 1: - { - // check that skipping these works also: - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_GEO_LOCATION_TERM, "no")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_NEAREST_NEIGHBOR, "no")); - // multi term query - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_OR, 3)); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 3)); - - stack.AppendBuffer(&buf); - keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); - correct = "bar\0foo\0foobar\0\0"; - - if (verify) ShowResult(testno, keywords, correct); - free(const_cast(keywords)); - break; - } - - case 2: - { - // phrase term query - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 3, "index")); - - stack.AppendBuffer(&buf); - keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); - correct = "bar foo foobar\0\0"; - - if (verify) ShowResult(testno, keywords, correct); - free(const_cast(keywords)); - break; - } - - case 3: - { - // multiple phrase and term query - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "xyzzy")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "xyz")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 2, "index")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 3, "index")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "baz")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "zog")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 3)); - - stack.AppendBuffer(&buf); - keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); - correct = "zog\0baz\0bar foo foobar\0xyz xyzzy\0\0"; - - if (verify) ShowResult(testno, keywords, correct); - free(const_cast(keywords)); - break; - } - - case 4: - { - // phrase term query with wrong argument items - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 2)); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 2, "index")); - - stack.AppendBuffer(&buf); - keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); - correct = "\0"; - - if (verify) ShowResult(testno, keywords, correct); - free(const_cast(keywords)); - break; - } - - default: - { - printf("%03d: no such test\n", testno); - return false; - } - } - - bool result = true; - /* - if (verify) { - result = ShowResult(testno, pq->GetStack(), correct); - delete correct; - } else { - result = true; - } - delete pq; - */ - return result; -} - -int -ExtractKeywordsTest::Usage(char *progname) -{ - printf("%s {testnospec}+\n\ - Where testnospec is:\n\ - num: single test\n\ - num-num: inclusive range (open range permitted)\n",progname); - printf("There are tests from %d to %d\n\n", 0, NUMTESTS-1); - return EXIT_FAILURE; -} - -int main(int argc, char** argv) { - vespalib::SignalHandler::PIPE.ignore(); - ExtractKeywordsTest tester; - return tester.main(argc, argv); -} diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.h b/searchsummary/src/tests/extractkeywords/extractkeywordstest.h deleted file mode 100644 index 6bd07d8a111..00000000000 --- a/searchsummary/src/tests/extractkeywords/extractkeywordstest.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -namespace search::docsummary { class KeywordExtractor; } - -class ExtractKeywordsTest -{ -private: - ExtractKeywordsTest(const ExtractKeywordsTest &); - ExtractKeywordsTest& operator=(const ExtractKeywordsTest &); - - search::docsummary::KeywordExtractor *_extractor; - - int Usage(char *progname); - bool ShowResult(int testNo, const char *actual, const char *correct); - bool RunTest(int i, bool verify); - -public: - ExtractKeywordsTest() - : _extractor(nullptr) - {} - int main(int argc, char **argv); -}; - diff --git a/searchsummary/src/tests/extractkeywords/runtests.sh b/searchsummary/src/tests/extractkeywords/runtests.sh deleted file mode 100755 index 611b47dd888..00000000000 --- a/searchsummary/src/tests/extractkeywords/runtests.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -set -e - -if $VALGRIND ./searchsummary_extractkeywordstest_app - -then - : -else - echo FAILED: searchsummary_extractkeywordstest_app test failed - exit 1 -fi - -if $VALGRIND ./searchsummary_extractkeywordstest_app - '*1000' -then - : -else - echo FAILED: searchsummary_extractkeywordstest_app test failed - exit 1 -fi - -echo SUCCESS: searchsummary_extractkeywordstest_app test completed diff --git a/searchsummary/src/tests/extractkeywords/simplequerystack.cpp b/searchsummary/src/tests/extractkeywords/simplequerystack.cpp deleted file mode 100644 index c96ef8a8455..00000000000 --- a/searchsummary/src/tests/extractkeywords/simplequerystack.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "simplequerystack.h" -#include - -#include -LOG_SETUP(".search.simplequerystack"); - -namespace search { - -SimpleQueryStack::SimpleQueryStack() - : _stack(nullptr) -{ -} - -SimpleQueryStack::~SimpleQueryStack() -{ - delete _stack; -} - -void -SimpleQueryStack::Push(SimpleQueryStackItem *item) -{ - item->_next = _stack; - _stack = item; -} - -void -SimpleQueryStack::AppendBuffer(RawBuf *buf) const -{ - for (SimpleQueryStackItem *item = _stack; item != nullptr; item = item->_next) { - item->AppendBuffer(buf); - } -} - -} // namespace search diff --git a/searchsummary/src/tests/extractkeywords/simplequerystack.h b/searchsummary/src/tests/extractkeywords/simplequerystack.h deleted file mode 100644 index 0b61a41944b..00000000000 --- a/searchsummary/src/tests/extractkeywords/simplequerystack.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "simplequerystackitem.h" -#include -#include - -namespace search { - -/** - * A stack of SimpleQueryStackItems. - * - * A simple stack consisting of a list of SimpleQueryStackItems. - * It is able to generate a binary encoding of itself - * to a RawBuf. - */ -class SimpleQueryStack -{ -private: - /** The top of the stack. */ - SimpleQueryStackItem *_stack; - -public: - SimpleQueryStack(const SimpleQueryStack &) = delete; - SimpleQueryStack& operator=(const SimpleQueryStack &) = delete; - /** - * Constructor for SimpleQueryStack. - */ - SimpleQueryStack(); - /** - * Destructor for SimpleQueryStack. - */ - ~SimpleQueryStack(); - /** - * Push an item on the stack. - * @param item The SimpleQueryStackItem to push. - */ - void Push(SimpleQueryStackItem *item); - - /** - * Encode the contents of the stack in a binary buffer. - * @param buf Pointer to a buffer containing the encoded contents. - */ - void AppendBuffer(RawBuf *buf) const; -}; - -} // namespace search - diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp deleted file mode 100644 index 65815f86251..00000000000 --- a/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "simplequerystackitem.h" -#include -#include -#include - -namespace search { - -SimpleQueryStackItem::SimpleQueryStackItem() - : _next(NULL), - _arg1(0), - _arg2(0), - _arg3(0), - _type(ITEM_UNDEF), - _arity(0), - _indexName(), - _term() -{} - -namespace { - -void assert_term_type(ParseItem::ItemType type) { - assert(type == ParseItem::ITEM_TERM || - type == ParseItem::ITEM_NUMTERM || - type == ParseItem::ITEM_NEAREST_NEIGHBOR || - type == ParseItem::ITEM_GEO_LOCATION_TERM || - type == ParseItem::ITEM_PREFIXTERM || - type == ParseItem::ITEM_SUBSTRINGTERM || - type == ParseItem::ITEM_SUFFIXTERM || - type == ParseItem::ITEM_PURE_WEIGHTED_STRING || - type == ParseItem::ITEM_PURE_WEIGHTED_LONG || - type == ParseItem::ITEM_EXACTSTRINGTERM || - type == ParseItem::ITEM_PREDICATE_QUERY); - (void) type; -} - -void assert_arity_type(ParseItem::ItemType type) { - // types with arity, but without an index name: - assert(type == ParseItem::ITEM_OR || - type == ParseItem::ITEM_WEAK_AND || - type == ParseItem::ITEM_EQUIV || - type == ParseItem::ITEM_AND || - type == ParseItem::ITEM_NOT || - type == ParseItem::ITEM_RANK || - type == ParseItem::ITEM_ANY || - type == ParseItem::ITEM_NEAR || - type == ParseItem::ITEM_ONEAR); - (void) type; -} - -void assert_arity_and_index_type(ParseItem::ItemType type) { - // types with arity and an index name: - assert(type == ParseItem::ITEM_PHRASE || - type == ParseItem::ITEM_SAME_ELEMENT || - type == ParseItem::ITEM_WEIGHTED_SET || - type == ParseItem::ITEM_DOT_PRODUCT || - type == ParseItem::ITEM_WAND || - type == ParseItem::ITEM_WORD_ALTERNATIVES); - (void) type; -} - -int64_t term_as_n64(vespalib::stringref term) { - int64_t tmp; - vespalib::asciistream generatedTerm(term); - generatedTerm >> tmp; - return vespalib::nbo::n2h(tmp); -} - -} // namespace - - -SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, int arity) : SimpleQueryStackItem() -{ - assert_arity_type(type); - SetType(type); - _arity = arity; -} - -SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, int arity, const char *idx) : SimpleQueryStackItem() -{ - assert_arity_and_index_type(type); - SetType(type); - _arity = arity; - SetIndex(idx); -} - -SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, const char *term) : SimpleQueryStackItem() -{ - assert_term_type(type); - SetType(type); - SetTerm(term); -} - -SimpleQueryStackItem::~SimpleQueryStackItem() -{ - delete _next; -} - -void -SimpleQueryStackItem::AppendBuffer(RawBuf *buf) const -{ - // Calculate lengths - uint32_t indexLen = _indexName.size(); - uint32_t termLen = _term.size(); - double nboVal = 0.0; - - // Put the values into the buffer. - buf->append(_type); - switch (Type()) { - case ITEM_OR: - case ITEM_EQUIV: - case ITEM_AND: - case ITEM_NOT: - case ITEM_RANK: - case ITEM_ANY: - buf->appendCompressedPositiveNumber(_arity); - break; - case ITEM_NEAR: - case ITEM_ONEAR: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(_arg1); - break; - case ITEM_SAME_ELEMENT: - case ITEM_WEIGHTED_SET: - case ITEM_DOT_PRODUCT: - case ITEM_PHRASE: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(indexLen); - buf->append(_indexName.c_str(), indexLen); - break; - case ITEM_WORD_ALTERNATIVES: - buf->appendCompressedPositiveNumber(indexLen); - buf->append(_indexName.c_str(), indexLen); - buf->appendCompressedPositiveNumber(_arity); - break; - case ITEM_WEAK_AND: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(_arg1); - buf->appendCompressedPositiveNumber(indexLen); - buf->append(_indexName.c_str(), indexLen); - break; - case ITEM_WAND: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(indexLen); - buf->append(_indexName.c_str(), indexLen); - buf->appendCompressedPositiveNumber(_arg1); // targetNumHits - nboVal = vespalib::nbo::n2h(_arg2); - buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold - nboVal = vespalib::nbo::n2h(_arg3); - buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor - break; - case ITEM_TERM: - case ITEM_NUMTERM: - case ITEM_GEO_LOCATION_TERM: - case ITEM_PREFIXTERM: - case ITEM_SUBSTRINGTERM: - case ITEM_EXACTSTRINGTERM: - case ITEM_SUFFIXTERM: - case ITEM_REGEXP: - case ITEM_FUZZY: - buf->appendCompressedPositiveNumber(indexLen); - buf->append(_indexName.c_str(), indexLen); - buf->appendCompressedPositiveNumber(termLen); - buf->append(_term.c_str(), termLen); - break; - case ITEM_TRUE: - case ITEM_FALSE: - // no content - break; - case ITEM_PURE_WEIGHTED_STRING: - buf->appendCompressedPositiveNumber(termLen); - buf->append(_term.c_str(), termLen); - break; - case ITEM_PURE_WEIGHTED_LONG: - { - int64_t tmp = term_as_n64(_term); - buf->append(&tmp, sizeof(int64_t)); - } - break; - case ITEM_NEAREST_NEIGHBOR: - buf->appendCompressedPositiveNumber(indexLen); - buf->append(_indexName.c_str(), indexLen); - buf->appendCompressedPositiveNumber(termLen); - buf->append(_term.c_str(), termLen); - buf->appendCompressedPositiveNumber(_arg1); // targetNumHits - buf->appendCompressedPositiveNumber(_arg2); // allow_approximate - buf->appendCompressedPositiveNumber(_arg3); // explore_additional_hits - break; - case ITEM_MULTI_TERM: // TODO: handle - case ITEM_PREDICATE_QUERY: // not handled at all here - case ITEM_UNDEF: - abort(); - break; - } -} - -} diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.h b/searchsummary/src/tests/extractkeywords/simplequerystackitem.h deleted file mode 100644 index 58864e18444..00000000000 --- a/searchsummary/src/tests/extractkeywords/simplequerystackitem.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include -#include -#include -#include - -namespace search { - -/** - * An item on the simple query stack. - * - * An object of this class represents a single item - * on the simple query stack. It has a type, which corresponds - * to the different query stack execution operations. It also - * provides an arity, and the string values indexName and term, to - * accomodate the different needs of the operations. - */ -class SimpleQueryStackItem : public ParseItem -{ -private: - SimpleQueryStackItem(const SimpleQueryStackItem &) = delete; - SimpleQueryStackItem& operator=(const SimpleQueryStackItem &) = delete; - SimpleQueryStackItem(); -public: - /** Pointer to next item in a linked list. */ - SimpleQueryStackItem *_next; - -private: - uint32_t _arg1; - double _arg2; - double _arg3; - ItemType _type; - -public: - ItemType Type() const { return _type; } - - /** The number of operands for the operation. */ - uint32_t _arity; - /** The name of the specified index, or empty if no index. */ - vespalib::string _indexName; - /** The specified search term. */ - vespalib::string _term; - -/** - * Overloaded constructor for SimpleQueryStackItem. Used primarily for - * the operators, or phrase without indexName. - * - * @param type The type of the SimpleQueryStackItem. - * @param arity The arity of the operation indicated by the SimpleQueryStackItem. - */ - SimpleQueryStackItem(ItemType type, int arity); - -/** - * Overloaded constructor for SimpleQueryStackItem. Used for PHRASEs. - * - * @param type The type of the SimpleQueryStackItem. - * @param arity The arity of the operation indicated by the SimpleQueryStackItem. - * @param idx The name of the index of the SimpleQueryStackItem. - */ - SimpleQueryStackItem(ItemType type, int arity, const char *index); - -/** - * Overloaded constructor for SimpleQueryStackItem. Used for TERMs without index. - * - * @param type The type of the SimpleQueryStackItem. - * @param term The actual term string of the SimpleQueryStackItem. - */ - SimpleQueryStackItem(ItemType type, const char *term); - -/** - * Destructor for SimpleQueryStackItem. - */ - ~SimpleQueryStackItem(); - -/** - * Set the value of the _term field. - * @param term The string to set the _term field to. - */ - void SetTerm(const char *term) { _term = term; } - -/** - * Set the value of the _indexName field. - * @param idx The string to set the _indexName field to. - */ - void SetIndex(const char *index) { _indexName = index; } - - /** - * Set the type of the operator. Use this with caution, - * as this changes the semantics of the item. - * - * @param type The new type. - */ - void SetType(ItemType type) { - _type = type; - } - - /** - * Encode the item in a binary buffer. - * @param buf Pointer to a buffer containing the encoded contents. - */ - void AppendBuffer(RawBuf *buf) const; -}; - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index 27e8ecea4ca..507734ac2e4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -4,6 +4,7 @@ #include "docsumstate.h" #include "docsum_field_writer_state.h" #include "i_docsum_store_document.h" +#include "keywordextractor.h" #include #include #include diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h index bea6a747f84..e54463ea202 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h @@ -6,18 +6,21 @@ #include "resultclass.h" #include "resultconfig.h" #include "docsumstore.h" -#include "keywordextractor.h" #include "docsum_field_writer.h" -#include #include #include -namespace search { class IAttributeManager; } +namespace search { + class IAttributeManager; + class RawBuf; +} namespace vespalib { class Slime; } namespace search::docsummary { +class KeywordExtractor; + static constexpr uint32_t SLIME_MAGIC_ID = 0x55555555; class IDocsumWriter diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index f7762408904..24558dd97e8 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -4,7 +4,7 @@ #include "docsumwriter.h" #include "docsumstate.h" #include "i_docsum_store_document.h" -#include +#include "keywordextractor.h" #include #include #include diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp index e97017e79c4..8b544eb09f6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp @@ -131,99 +131,4 @@ KeywordExtractor::IsLegalIndex(vespalib::stringref idxS) const IsLegalIndexName(resolvedIdxName.c_str())); } - -char * -KeywordExtractor::ExtractKeywords(vespalib::stringref buf) const -{ - search::SimpleQueryStackDumpIterator si(buf); - char keywordstore[4_Ki]; // Initial storage for keywords buffer - search::RawBuf keywords(keywordstore, sizeof(keywordstore)); - - while (si.next()) { - search::ParseItem::ItemCreator creator = si.getCreator(); - switch (si.getType()) { - case search::ParseItem::ITEM_NOT: - /** - * @todo Must consider only the first argument on the stack. - * Difficult without recursion. - */ - break; - - case search::ParseItem::ITEM_PHRASE: - { - // Must take the next arity TERMS and put together - bool phraseterms_was_added = false; - int phraseterms = si.getArity(); - for (int i = 0; i < phraseterms; i++) { - si.next(); - search::ParseItem::ItemType newtype = si.getType(); - if (newtype != search::ParseItem::ITEM_TERM && - newtype != search::ParseItem::ITEM_NUMTERM) - { - // stack syntax error - // LOG(debug, "Extracting keywords found a non-term in a phrase"); - // making a clean escape. - keywords.reset(); - goto iteratorloopend; - } else { - if (!IsLegalIndex(si.getIndexName())) - continue; - // Found a term - vespalib::stringref term = si.getTerm(); - search::ParseItem::ItemCreator term_creator = si.getCreator(); - if ( !term.empty() && useful(term_creator)) { - // Actual term to add - if (phraseterms_was_added) { - // Not the first term in the phrase - keywords += " "; - } else { - phraseterms_was_added = true; - } - - keywords.append(term.data(), term.size()); - } - } - } - if (phraseterms_was_added) { - // Terms was added, so 0-terminate the string - keywords.append("\0", 1); - } - - break; - } - case search::ParseItem::ITEM_PREFIXTERM: - case search::ParseItem::ITEM_SUBSTRINGTERM: - case search::ParseItem::ITEM_EXACTSTRINGTERM: - case search::ParseItem::ITEM_NUMTERM: - case search::ParseItem::ITEM_TERM: - if (!IsLegalIndex(si.getIndexName())) - continue; - { - // add a new keyword - vespalib::stringref term = si.getTerm(); - if ( !term.empty() && useful(creator)) { - // An actual string to add - keywords.append(term.data(), term.size()); - keywords.append("\0", 1); - } - } - break; - - default: - // Do nothing to AND, RANK, OR - break; - } - } - iteratorloopend: - // Add a 'blank' keyword - keywords.append("\0", 1); - - // Must now allocate a string and copy the data from the rawbuf - void *result = malloc(keywords.GetUsedLen()); - if (result != nullptr) { - memcpy(result, keywords.GetDrainPos(), keywords.GetUsedLen()); - } - return static_cast(result); -} - } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h index 11e8da1ac7b..9d6905d66d4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h @@ -3,7 +3,6 @@ #pragma once #include -#include namespace search::docsummary { @@ -25,12 +24,11 @@ public: private: typedef vespalib::hash_set Set; - IDocsumEnvironment *_env; - std::vector _legalPrefixes; - Set _legalIndexes; + IDocsumEnvironment *_env; + std::vector _legalPrefixes; + Set _legalIndexes; - bool IsLegalIndexPrefix(const char *idxName) const - { + bool IsLegalIndexPrefix(const char *idxName) const { for (auto& prefix : _legalPrefixes ) { if (prefix.Match(idxName)) { return true; @@ -39,6 +37,13 @@ private: return false; } + void AddLegalIndexPrefix(const char *prefix) { + _legalPrefixes.emplace_back(prefix); + } + + void AddLegalIndexName(const char *idxName) { + _legalIndexes.insert(idxName); + } bool IsLegalIndexName(const char *idxName) const; public: explicit KeywordExtractor(IDocsumEnvironment * env); @@ -47,28 +52,6 @@ public: ~KeywordExtractor(); - /** - * Add a prefix to the set of legal index name prefixes. - * - * @param prefix the index name prefix to add. - **/ - void AddLegalIndexPrefix(const char *prefix) - { - _legalPrefixes.emplace_back(prefix); - } - - - /** - * Add a name to the set of legal index names. - * - * @param idxName the index name to add. - **/ - void AddLegalIndexName(const char *idxName) - { - _legalIndexes.insert(idxName); - } - - /** * Parse the input string as a ';' separated list of index names and * index name prefixes. A '*' following a token in the list denotes @@ -98,27 +81,6 @@ public: * @return true if the given index name is legal. **/ bool IsLegalIndex(vespalib::stringref idx) const; - - - /** - * Extract keywords from a stack dump of a SimpleQueryStack. - * - * The words are extracted as follows: For AND and OR operators, all - * TERM items occuring in a legal index (the set of legal indexes is - * defined by invoking the @ref AddLegalIndex and @ref - * AddLegalIndexPrefix methods) are extracted. - * - * For PHRASE operators, the TERMS in a phrase are put together with - * space between them. - * - * @todo For NOT operators, only the first operand is considered. - * - * @param buf Pointer to buffer with simple query stack dump. - * @param bufLen Length of stack dump buffer - * @return Pointer to a buffer containing zero-terminated keywords, - * with an empty word at the end. - */ - char *ExtractKeywords(vespalib::stringref buf) const; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index 13bd122362a..7fb94f48ac3 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -3,7 +3,6 @@ #pragma once #include "res_config_entry.h" -#include #include #include #include diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index 20ab1ccf325..2b88286d381 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp index e7532d2a25a..aa1cf124fee 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp @@ -4,6 +4,7 @@ #include "docsumconfig.h" #include "i_matching_elements_filler.h" #include +#include #include LOG_SETUP(".vsm.vsm-adapter"); -- cgit v1.2.3