diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-09-19 13:41:29 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-09-19 13:41:29 +0200 |
commit | 0164f1909840b51a6211afaa0b01017738ec494a (patch) | |
tree | d0422832ce70ac8fe152b01814c052dd6aec7147 /streamingvisitors/src | |
parent | 9282b402c6871048f4be628849f25d37497eddfb (diff) |
Use SlimeFiller instead of SlimeFieldWriter for streaming search.
Diffstat (limited to 'streamingvisitors/src')
7 files changed, 44 insertions, 401 deletions
diff --git a/streamingvisitors/src/tests/docsum/docsum.cpp b/streamingvisitors/src/tests/docsum/docsum.cpp index b7f45123c48..fc30a0b1239 100644 --- a/streamingvisitors/src/tests/docsum/docsum.cpp +++ b/streamingvisitors/src/tests/docsum/docsum.cpp @@ -5,8 +5,8 @@ #include <vespa/document/datatype/weightedsetdatatype.h> #include <vespa/document/datatype/mapdatatype.h> #include <vespa/vsm/common/docsum.h> +#include <vespa/vsm/common/storagedocument.h> #include <vespa/vsm/vsm/flattendocsumwriter.h> -#include <vespa/vsm/vsm/slimefieldwriter.h> #include <vespa/vespalib/data/smart_buffer.h> #include <vespa/vespalib/data/slime/slime.h> @@ -59,15 +59,7 @@ private: assertFlattenDocsumWriter(fdw, fv, exp); } void assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp); - void assertSlimeFieldWriter(const FieldValue & fv, const std::string & exp) { - SlimeFieldWriter sfw; - TEST_DO(assertSlimeFieldWriter(sfw, fv, exp)); - } - void assertSlimeFieldWriter(SlimeFieldWriter & sfw, const FieldValue & fv, const std::string & exp); - void testFlattenDocsumWriter(); - void testSlimeFieldWriter(); - void requireThatSlimeFieldWriterHandlesMap(); void testDocSumCache(); public: @@ -107,32 +99,6 @@ DocsumTest::assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValu } void -convert(SlimeFieldWriter & sfw, const document::FieldValue & fv, vespalib::Output & output) -{ - vespalib::Slime slime; - vespalib::slime::SlimeInserter inserter(slime); - sfw.insert(fv, inserter); - vespalib::slime::BinaryFormat::encode(slime, output); -} - -void -DocsumTest::assertSlimeFieldWriter(SlimeFieldWriter & sfw, const FieldValue & fv, const std::string & exp) -{ - vespalib::SmartBuffer buffer(1024); - convert(sfw, fv, buffer); - - vespalib::Slime gotSlime; - vespalib::Memory serialized(buffer.obtain()); - size_t decodeRes = vespalib::slime::BinaryFormat::decode(serialized, gotSlime); - ASSERT_EQUAL(decodeRes, serialized.size); - - vespalib::Slime expSlime; - size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); - EXPECT_TRUE(used > 0); - EXPECT_EQUAL(expSlime, gotSlime); -} - -void DocsumTest::testFlattenDocsumWriter() { { // basic tests @@ -169,132 +135,12 @@ DocsumTest::testFlattenDocsumWriter() } } -void -DocsumTest::testSlimeFieldWriter() -{ - { // basic types - assertSlimeFieldWriter(LongFieldValue(123456789), "123456789"); - assertSlimeFieldWriter(BoolFieldValue(true), "true"); - assertSlimeFieldWriter(BoolFieldValue(false), "false"); - assertSlimeFieldWriter(DoubleFieldValue(12.34), "12.34"); - assertSlimeFieldWriter(StringFieldValue("foo bar"), "\"foo bar\""); - } - { // collection field values - assertSlimeFieldWriter(createFieldValue(StringList().add("foo").add("bar").add("baz")), - "[\"foo\",\"bar\",\"baz\"]"); - assertSlimeFieldWriter(createFieldValue(WeightedStringList().add(std::make_pair("bar", 20)). - add(std::make_pair("baz", 30)). - add(std::make_pair("foo", 10))), - "[{item:\"bar\",weight:20},{item:\"baz\",weight:30},{item:\"foo\",weight:10}]"); - } - { // struct field value - StructDataType subType("substruct"); - Field fd("d", 0, *DataType::STRING); - Field fe("e", 1, *DataType::STRING); - subType.addField(fd); - subType.addField(fe); - StructFieldValue subValue(subType); - subValue.setValue(fd, StringFieldValue("baz")); - subValue.setValue(fe, StringFieldValue("qux")); - - StructDataType type("struct"); - Field fa("a", 0, *DataType::STRING); - Field fb("b", 1, *DataType::STRING); - Field fc("c", 2, subType); - type.addField(fa); - type.addField(fb); - type.addField(fc); - StructFieldValue value(type); - value.setValue(fa, StringFieldValue("foo")); - value.setValue(fb, StringFieldValue("bar")); - value.setValue(fc, subValue); - - - { // select a subset and then all - SlimeFieldWriter sfw; - DocsumFieldSpec::FieldIdentifierVector fields; - { - FieldPath path; - type.buildFieldPath(path, "a"); - fields.push_back(DocsumFieldSpec::FieldIdentifier(0, std::move(path))); - } - { - FieldPath path; - type.buildFieldPath(path, "c.e"); - fields.push_back(DocsumFieldSpec::FieldIdentifier(0, std::move(path))); - } - sfw.setInputFields(fields); - TEST_DO(assertSlimeFieldWriter(sfw, value, "{\"a\":\"foo\",\"c\":{\"e\":\"qux\"}}")); - sfw.clear(); - TEST_DO(assertSlimeFieldWriter(sfw, value, "{\"a\":\"foo\",\"b\":\"bar\",\"c\":{\"d\":\"baz\",\"e\":\"qux\"}}")); - } - - { // multiple invocations - SlimeFieldWriter sfw; - TEST_DO(assertSlimeFieldWriter(sfw, StringFieldValue("foo"), "\"foo\"")); - sfw.clear(); - TEST_DO(assertSlimeFieldWriter(sfw, StringFieldValue("bar"), "\"bar\"")); - sfw.clear(); - TEST_DO(assertSlimeFieldWriter(sfw, StringFieldValue("baz"), "\"baz\"")); - } - - } -} - -void -DocsumTest::requireThatSlimeFieldWriterHandlesMap() -{ - { // map<string, string> - MapDataType mapType(*DataType::STRING, *DataType::STRING); - MapFieldValue mapfv(mapType); - EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), StringFieldValue("v1"))); - EXPECT_TRUE(mapfv.put(StringFieldValue("k2"), StringFieldValue("v2"))); - assertSlimeFieldWriter(mapfv, "[{\"key\":\"k1\",\"value\":\"v1\"},{\"key\":\"k2\",\"value\":\"v2\"}]"); - } - { // map<string, struct> - StructDataType structType("struct"); - Field fa("a", 0, *DataType::STRING); - Field fb("b", 1, *DataType::STRING); - structType.addField(fa); - structType.addField(fb); - StructFieldValue structValue(structType); - structValue.setValue(fa, StringFieldValue("foo")); - structValue.setValue(fb, StringFieldValue("bar")); - MapDataType mapType(*DataType::STRING, structType); - MapFieldValue mapfv(mapType); - EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), structValue)); - { // select a subset and then all - SlimeFieldWriter sfw; - DocsumFieldSpec::FieldIdentifierVector fields; - { - FieldPath path; - mapType.buildFieldPath(path, "value.b"); - fields.push_back(DocsumFieldSpec::FieldIdentifier(0, std::move(path))); - } - sfw.setInputFields(fields); - TEST_DO(assertSlimeFieldWriter(sfw, mapfv, "[{\"key\":\"k1\",\"value\":{\"b\":\"bar\"}}]")); - { - FieldPath path; - mapType.buildFieldPath(path, "{k1}.a"); - fields[0] = DocsumFieldSpec::FieldIdentifier(0, std::move(path)); - } - sfw.clear(); - sfw.setInputFields(fields); - TEST_DO(assertSlimeFieldWriter(sfw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\"}}]")); - sfw.clear(); // all fields implicit - TEST_DO(assertSlimeFieldWriter(sfw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\",\"b\":\"bar\"}}]")); - } - } -} - int DocsumTest::Main() { TEST_INIT("docsum_test"); TEST_DO(testFlattenDocsumWriter()); - TEST_DO(testSlimeFieldWriter()); - TEST_DO(requireThatSlimeFieldWriterHandlesMap()); TEST_DONE(); } diff --git a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt index 1e766baf0ed..cf121aead4b 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt +++ b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt @@ -6,7 +6,6 @@ vespa_add_library(vsm_vsmbase OBJECT docsum_field_writer_factory.cpp fieldsearchspec.cpp flattendocsumwriter.cpp - slimefieldwriter.cpp snippetmodifier.cpp vsm-adapter.cpp DEPENDS diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp index 936aaaa2091..3d689b385b3 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp @@ -1,5 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + #include "docsumfieldspec.h" +#include <vespa/searchsummary/docsummary/slime_filler_filter.h> namespace vsm { @@ -21,7 +23,8 @@ DocsumFieldSpec::DocsumFieldSpec() : _resultType(search::docsummary::RES_INT), _command(VsmsummaryConfig::Fieldmap::Command::NONE), _outputField(), - _inputFields() + _inputFields(), + _filter() { } DocsumFieldSpec::DocsumFieldSpec(search::docsummary::ResType resultType, @@ -29,7 +32,24 @@ DocsumFieldSpec::DocsumFieldSpec(search::docsummary::ResType resultType, _resultType(resultType), _command(command), _outputField(), - _inputFields() + _inputFields(), + _filter() { } +DocsumFieldSpec::DocsumFieldSpec(DocsumFieldSpec&&) noexcept = default; + +DocsumFieldSpec::~DocsumFieldSpec() = default; + +void +DocsumFieldSpec::set_filter(std::unique_ptr<search::docsummary::SlimeFillerFilter> filter) +{ + _filter = std::move(filter); +} + +const search::docsummary::SlimeFillerFilter * +DocsumFieldSpec::get_filter() const noexcept +{ + return _filter.get(); +} + } diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h index db6ee9fa223..e08ae8c6e71 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h @@ -6,6 +6,8 @@ #include <vespa/vsm/common/storagedocument.h> #include <vespa/vsm/config/vsm-cfif.h> +namespace search::docsummary { class SlimeFillerFilter; } + namespace vsm { /** @@ -40,10 +42,13 @@ private: VsmsummaryConfig::Fieldmap::Command _command; FieldIdentifier _outputField; FieldIdentifierVector _inputFields; + std::unique_ptr<search::docsummary::SlimeFillerFilter> _filter; public: DocsumFieldSpec(); DocsumFieldSpec(search::docsummary::ResType resultType, VsmsummaryConfig::Fieldmap::Command command); + DocsumFieldSpec(DocsumFieldSpec&&) noexcept; + ~DocsumFieldSpec(); /** * Returns the result type for the summary field. @@ -66,6 +71,8 @@ public: void setOutputField(FieldIdentifier outputField) { _outputField = std::move(outputField); } const FieldIdentifierVector & getInputFields() const { return _inputFields; } FieldIdentifierVector & getInputFields() { return _inputFields; } + void set_filter(std::unique_ptr<search::docsummary::SlimeFillerFilter> filter); + const search::docsummary::SlimeFillerFilter *get_filter() const noexcept; }; } diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp index ca1de0082f0..534f8b92445 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp @@ -1,15 +1,18 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docsumfilter.h" -#include "slimefieldwriter.h" #include <vespa/juniper/juniper_separators.h> #include <vespa/searchsummary/docsummary/check_undefined_value_visitor.h> #include <vespa/searchsummary/docsummary/i_docsum_store_document.h> #include <vespa/searchsummary/docsummary/i_juniper_converter.h> #include <vespa/searchsummary/docsummary/i_string_field_converter.h> +#include <vespa/searchsummary/docsummary/slime_filler.h> +#include <vespa/searchsummary/docsummary/slime_filler_filter.h> #include <vespa/searchsummary/docsummary/summaryfieldconverter.h> #include <vespa/document/base/exceptions.h> +#include <vespa/document/datatype/datatype.h> #include <vespa/document/fieldvalue/iteratorhandler.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/vespalib/data/slime/inserter.h> #include <vespa/log/log.h> @@ -213,6 +216,10 @@ DocsumFilter::prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldS } } // setup input fields + std::unique_ptr<SlimeFillerFilter> filter; + if (spec.getResultType() == RES_JSONSTRING) { + filter = std::make_unique<SlimeFillerFilter>(); + } for (size_t i = 0; i < toolsSpec.getInputNames().size(); ++i) { const vespalib::string & name = toolsSpec.getInputNames()[i]; LOG(debug, "prepareFieldSpec: input field name '%s'", name.c_str()); @@ -231,6 +238,10 @@ DocsumFilter::prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldS } else { LOG(warning, "Could not find input summary field '%s'", name.c_str()); } + SlimeFillerFilter::add_remaining(filter, name); + } + if (filter && !filter->empty()) { + spec.set_filter(std::move(filter)); } } @@ -407,11 +418,8 @@ DocsumFilter::insert_struct_or_multivalue_summary_field(const DocsumFieldSpec& f CheckUndefinedValueVisitor check_undefined; fv->accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFieldWriter writer; - if (! field_spec.hasIdentityMapping()) { - writer.setInputFields(field_spec.getInputFields()); - } - writer.insert(*fv, inserter); + SlimeFiller writer(inserter, nullptr, field_spec.get_filter()); + fv->accept(writer); } } diff --git a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp b/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp deleted file mode 100644 index f0278bb3470..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "slimefieldwriter.h" -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/size_literals.h> -#include <vespa/searchsummary/docsummary/resultconfig.h> -#include <vespa/document/datatype/positiondatatype.h> -#include <vespa/vespalib/data/slime/slime.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.slimefieldwriter"); - -namespace { - -vespalib::string -toString(const std::vector<vespalib::string> & fieldPath) -{ - vespalib::asciistream oss; - for (size_t i = 0; i < fieldPath.size(); ++i) { - if (i > 0) { - oss << "."; - } - oss << fieldPath[i]; - } - return oss.str(); -} - -} // namespace <unnamed> - -using namespace vespalib::slime::convenience; - -namespace vsm { - -void -SlimeFieldWriter::traverseRecursive(const document::FieldValue & fv, Inserter &inserter) -{ - LOG(debug, "traverseRecursive: class(%s), fieldValue(%s), currentPath(%s)", - fv.className(), fv.toString().c_str(), toString(_currPath).c_str()); - - if (fv.isCollection()) { - const auto & cfv = static_cast<const document::CollectionFieldValue &>(fv); - if (cfv.isA(document::FieldValue::Type::ARRAY)) { - const auto & afv = static_cast<const document::ArrayFieldValue &>(cfv); - Cursor &a = inserter.insertArray(); - for (const auto & nfv : afv) { - ArrayInserter ai(a); - traverseRecursive(nfv, ai); - } - } else { - assert(cfv.isA(document::FieldValue::Type::WSET)); - const auto & wsfv = static_cast<const document::WeightedSetFieldValue &>(cfv); - Cursor &a = inserter.insertArray(); - Symbol isym = a.resolve("item"); - Symbol wsym = a.resolve("weight"); - for (const auto &entry : wsfv) { - Cursor &o = a.addObject(); - const document::FieldValue & nfv = *entry.first; - ObjectSymbolInserter oi(o, isym); - traverseRecursive(nfv, oi); - int weight = static_cast<const document::IntFieldValue &>(*entry.second).getValue(); - o.setLong(wsym, weight); - } - } - } else if (fv.isA(document::FieldValue::Type::MAP)) { - const auto & mfv = static_cast<const document::MapFieldValue &>(fv); - Cursor &a = inserter.insertArray(); - Symbol keysym = a.resolve("key"); - Symbol valsym = a.resolve("value"); - for (const auto &entry : mfv) { - Cursor &o = a.addObject(); - ObjectSymbolInserter ki(o, keysym); - traverseRecursive(*entry.first, ki); - _currPath.emplace_back("value"); - ObjectSymbolInserter vi(o, valsym); - traverseRecursive(*entry.second, vi); - _currPath.pop_back(); - } - } else if (fv.isStructured()) { - const auto & sfv = static_cast<const document::StructuredFieldValue &>(fv); - Cursor &o = inserter.insertObject(); - if (sfv.getDataType() == &document::PositionDataType::getInstance() - && search::docsummary::ResultConfig::wantedV8geoPositions()) - { - bool ok = true; - try { - int x = std::numeric_limits<int>::min(); - int y = std::numeric_limits<int>::min(); - for (const document::Field & entry : sfv) { - document::FieldValue::UP fval(sfv.getValue(entry)); - if (entry.getName() == "x") { - x = fval->getAsInt(); - } else if (entry.getName() == "y") { - y = fval->getAsInt(); - } else { - ok = false; - } - } - if (x == std::numeric_limits<int>::min()) ok = false; - if (y == std::numeric_limits<int>::min()) ok = false; - if (ok) { - o.setDouble("lat", double(y) / 1.0e6); - o.setDouble("lng", double(x) / 1.0e6); - return; - } - } catch (std::exception &e) { - (void)e; - // fallback to code below - } - } - for (const document::Field & entry : sfv) { - if (explorePath(entry.getName())) { - _currPath.push_back(entry.getName()); - Memory keymem(entry.getName()); - ObjectInserter oi(o, keymem); - document::FieldValue::UP fval(sfv.getValue(entry)); - traverseRecursive(*fval, oi); - _currPath.pop_back(); - } - } - } else { - if (fv.isLiteral()) { - const auto & lfv = static_cast<const document::LiteralFieldValueB &>(fv); - inserter.insertString(lfv.getValueRef()); - } else if (fv.isNumeric()) { - switch (fv.getDataType()->getId()) { - case document::DataType::T_BYTE: - case document::DataType::T_SHORT: - case document::DataType::T_INT: - case document::DataType::T_LONG: - inserter.insertLong(fv.getAsLong()); - break; - case document::DataType::T_DOUBLE: - inserter.insertDouble(fv.getAsDouble()); - break; - case document::DataType::T_FLOAT: - inserter.insertDouble(fv.getAsFloat()); - break; - default: - inserter.insertString(fv.getAsString()); - } - } else if (fv.isA(document::FieldValue::Type::BOOL)) { - const auto & bfv = static_cast<const document::BoolFieldValue &>(fv); - inserter.insertBool(bfv.getValue()); - } else { - inserter.insertString(fv.toString()); - } - } -} - -bool -SlimeFieldWriter::explorePath(vespalib::stringref candidate) -{ - if (_inputFields == nullptr) { - return true; - } - // find out if we should explore the current path - for (const auto & field : *_inputFields) { - const FieldPath & fp = field.getPath(); - if (_currPath.size() <= fp.size()) { - bool equal = true; - for (size_t j = 0; j < _currPath.size() && equal; ++j) { - equal = (fp[j].getName() == _currPath[j]); - } - if (equal) { - if (_currPath.size() == fp.size()) { - return true; - } else if (fp[_currPath.size()].getName() == candidate) { - // the current path matches one of the input field paths - return true; - } - } - } - } - return false; -} - -SlimeFieldWriter::SlimeFieldWriter() : - _inputFields(nullptr), - _currPath() -{ -} - -SlimeFieldWriter::~SlimeFieldWriter() = default; - -void -SlimeFieldWriter::insert(const document::FieldValue & fv, vespalib::slime::Inserter& inserter) -{ - traverseRecursive(fv, inserter); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h b/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h deleted file mode 100644 index 9f72e4f3687..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "docsumfieldspec.h" -#include <vespa/vsm/common/storagedocument.h> -#include <vespa/document/fieldvalue/fieldvalues.h> - -namespace vespalib::slime { struct Inserter; } - -namespace vsm { - -/** - * This class is used to write a field value as slime binary data. - * If only a subset of the field value should be written this subset - * is specified using the setInputFields() function. - **/ -class SlimeFieldWriter -{ -private: - const DocsumFieldSpec::FieldIdentifierVector * _inputFields; - std::vector<vespalib::string> _currPath; - - void traverseRecursive(const document::FieldValue & fv, vespalib::slime::Inserter & inserter); - bool explorePath(vespalib::stringref candidate); - -public: - SlimeFieldWriter(); - ~SlimeFieldWriter(); - - /** - * Specifies the subset of the field value that should be written. - **/ - void setInputFields(const DocsumFieldSpec::FieldIdentifierVector & inputFields) { _inputFields = &inputFields; } - - /** - * Insert the given field value - **/ - void insert(const document::FieldValue & fv, vespalib::slime::Inserter& inserter); - - void clear() { - _inputFields = nullptr; - _currPath.clear(); - } -}; - -} |