diff options
9 files changed, 109 insertions, 17 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java index 8bb834d4697..d86ed265b77 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java @@ -2,8 +2,13 @@ package com.yahoo.searchdefinition.processing; import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.WeightedSetDataType; import com.yahoo.searchdefinition.RankProfileRegistry; import com.yahoo.searchdefinition.Search; +import com.yahoo.searchdefinition.document.ComplexAttributeFieldUtils; +import com.yahoo.searchdefinition.document.ImmutableSDField; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; @@ -43,15 +48,40 @@ public class MatchedElementsOnlyResolver extends Processor { if (isComplexFieldWithOnlyStructFieldAttributes(sourceField)) { field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); } + } else if (isSupportedAttributeField(sourceField)) { + field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); } else if (validate) { fail(summary, field, "'matched-elements-only' is not supported for this field type. " + - "Supported field types are array of simple struct, map of primitive type to simple struct, " + + "Supported field types are: array attribute, weighted set attribute, " + + "array of simple struct, map of primitive type to simple struct, " + "and map of primitive type to primitive type"); } } // else case is handled in SummaryFieldsMustHaveValidSource } + private boolean isSupportedAttributeField(ImmutableSDField sourceField) { + var type = sourceField.getDataType(); + return sourceField.doesAttributing() && + (isArrayOfPrimitiveType(type) || isWeightedsetOfPrimitiveType(type)); + } + + private boolean isArrayOfPrimitiveType(DataType type) { + if (type instanceof ArrayDataType) { + var arrayType = (ArrayDataType) type; + return ComplexAttributeFieldUtils.isPrimitiveType(arrayType.getNestedType()); + } + return false; + } + + private boolean isWeightedsetOfPrimitiveType(DataType type) { + if (type instanceof WeightedSetDataType) { + var wsetType = (WeightedSetDataType) type; + return ComplexAttributeFieldUtils.isPrimitiveType(wsetType.getNestedType()); + } + return false; + } + private void fail(DocumentSummary summary, SummaryField field, String msg) { throw new IllegalArgumentException(formatError(search, summary, field, msg)); } diff --git a/config-model/src/test/derived/importedfields/child.sd b/config-model/src/test/derived/importedfields/child.sd index d541ba8fc8c..95fe44d333a 100644 --- a/config-model/src/test/derived/importedfields/child.sd +++ b/config-model/src/test/derived/importedfields/child.sd @@ -23,5 +23,9 @@ search child { summary my_int_array_field type array<int> {} summary my_int_wset_field type weightedset<int> {} summary my_ancient_int_field type int {} + summary my_filtered_int_array_field type array<int> { + source: my_int_array_field + matched-elements-only + } } } diff --git a/config-model/src/test/derived/importedfields/summary.cfg b/config-model/src/test/derived/importedfields/summary.cfg index 68ad270a314..f95949cfa62 100644 --- a/config-model/src/test/derived/importedfields/summary.cfg +++ b/config-model/src/test/derived/importedfields/summary.cfg @@ -9,7 +9,7 @@ classes[].fields[].name "summaryfeatures" classes[].fields[].type "featuredata" classes[].fields[].name "documentid" classes[].fields[].type "longstring" -classes[].id 1660388492 +classes[].id 159551552 classes[].name "mysummary" classes[].fields[].name "a_ref" classes[].fields[].type "longstring" @@ -25,6 +25,8 @@ classes[].fields[].name "my_int_wset_field" classes[].fields[].type "jsonstring" classes[].fields[].name "my_ancient_int_field" classes[].fields[].type "integer" +classes[].fields[].name "my_filtered_int_array_field" +classes[].fields[].type "jsonstring" classes[].fields[].name "rankfeatures" classes[].fields[].type "featuredata" classes[].fields[].name "summaryfeatures" diff --git a/config-model/src/test/derived/importedfields/summarymap.cfg b/config-model/src/test/derived/importedfields/summarymap.cfg index d038e2313a8..03ae5e2676e 100644 --- a/config-model/src/test/derived/importedfields/summarymap.cfg +++ b/config-model/src/test/derived/importedfields/summarymap.cfg @@ -14,6 +14,9 @@ override[].arguments "my_int_wset_field" override[].field "my_ancient_int_field" override[].command "attribute" override[].arguments "my_ancient_int_field" +override[].field "my_filtered_int_array_field" +override[].command "matchedattributeelementsfilter" +override[].arguments "my_int_array_field" override[].field "rankfeatures" override[].command "rankfeatures" override[].arguments "" diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java index 3b6918c04ae..0ef696df6cf 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java @@ -75,7 +75,7 @@ public class MatchedElementsOnlyResolverTestCase { } @Test - public void explicit_summary_field_can_use_filter_transform_with_reference_to_source_field() throws ParseException { + public void explicit_complex_summary_field_can_use_filter_transform_with_reference_to_source_field() throws ParseException { String documentSummary = joinLines("document-summary my_summary {", " summary my_filter_field type map<string, string> {", " source: my_field", @@ -108,12 +108,51 @@ public class MatchedElementsOnlyResolverTestCase { } @Test + public void primitive_array_attribute_field_gets_attribute_transform() throws ParseException { + assertSummaryField(joinLines("field my_field type array<string> {", + " indexing: attribute | summary", + " summary: matched-elements-only", + "}"), + "my_field", SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } + + @Test + public void primitive_weighted_set_attribute_field_gets_attribute_transform() throws ParseException { + assertSummaryField(joinLines("field my_field type weightedset<string> {", + " indexing: attribute | summary", + " summary: matched-elements-only", + "}"), + "my_field", SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } + + @Test + public void explicit_summary_field_can_use_filter_transform_with_reference_to_attribute_source_field() throws ParseException { + String documentSummary = joinLines("document-summary my_summary {", + " summary my_filter_field type array<string> {", + " source: my_field", + " matched-elements-only", + " }", + "}"); + + var search = buildSearch(joinLines("field my_field type array<string> {", + " indexing: attribute | summary", + "}"), + documentSummary); + assertSummaryField(search.getSummaryField("my_filter_field"), + SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER, "my_field"); + assertSummaryField(search.getSummaryField("my_field"), + SummaryTransform.ATTRIBUTE, "my_field"); + } + + @Test public void unsupported_field_type_throws() throws ParseException { exceptionRule.expect(IllegalArgumentException.class); exceptionRule.expectMessage("For search 'test', document summary 'default', summary field 'my_field': " + "'matched-elements-only' is not supported for this field type. " + - "Supported field types are array of simple struct, map of primitive type to simple struct, and map of primitive type to primitive type"); - buildSearch(joinLines("field my_field type string {", + "Supported field types are: array attribute, weighted set attribute, " + + "array of simple struct, map of primitive type to simple struct, " + + "and map of primitive type to primitive type"); + buildSearch(joinLines("field my_field type array<string> {", " indexing: summary", " summary: matched-elements-only", "}")); diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp index c8d0a572cd9..361fde48aa8 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp @@ -128,6 +128,8 @@ void find_matching_elements(const MatchingElementsFields &fields, const std::vec } else if (const AttrSearchCtx *attr_ctx = bp.get_attribute_search_context()) { if (fields.has_struct_field(attr_ctx->attributeName())) { find_matching_elements(docs, fields.get_enclosing_field(attr_ctx->attributeName()), *attr_ctx, result); + } else if (fields.has_field(attr_ctx->attributeName())) { + find_matching_elements(docs, attr_ctx->attributeName(), *attr_ctx, result); } } else if (auto and_not = as<AndNotBlueprint>(bp)) { find_matching_elements(fields, docs, and_not->getChild(0), result); diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index ae0696d5824..4adb3507eeb 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -303,7 +303,7 @@ LogDataStore::compact(uint64_t syncToken) const bool doCompact = (_fileChunks.size() > 1); if (doCompact) { LOG(info, "%s. Will compact", bloatMsg(bloat, usage).c_str()); - compactWorst(_config.getMaxDiskBloatFactor(), _config.getMaxBucketSpread()); + compactWorst(_config.getMaxDiskBloatFactor(), _config.getMaxBucketSpread(), isTotalDiskBloatExceeded(usage, bloat)); } flushActiveAndWait(syncToken); if (doCompact) { @@ -313,16 +313,22 @@ LogDataStore::compact(uint64_t syncToken) } } +bool +LogDataStore::isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const { + const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor(); + return bloat > maxConfiguredDiskBloat; +} + size_t LogDataStore::getMaxCompactGain() const { - const size_t diskFootPrint = getDiskFootprint(); - const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor(); - double maxSpread = getMaxBucketSpread(); size_t bloat = getDiskBloat(); - if (bloat < maxConfiguredDiskBloat) { + const size_t diskFootPrint = getDiskFootprint(); + if ( ! isTotalDiskBloatExceeded(diskFootPrint, bloat) ) { bloat = 0; } + + const double maxSpread = getMaxBucketSpread(); size_t spreadAsBloat = diskFootPrint * (1.0 - 1.0/maxSpread); if ( maxSpread < _config.getMaxBucketSpread()) { spreadAsBloat = 0; @@ -374,7 +380,7 @@ LogDataStore::getMaxBucketSpread() const } std::pair<bool, LogDataStore::FileId> -LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit) +LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat) { typedef std::multimap<double, FileId, std::greater<double>> CostMap; CostMap worstBloat; @@ -402,7 +408,7 @@ LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit) } } std::pair<bool, FileId> retval(false, FileId(-1)); - if ( ! worstBloat.empty() && (worstBloat.begin()->first > bloatLimit)) { + if ( ! worstBloat.empty() && (worstBloat.begin()->first > bloatLimit) && prioritizeDiskBloat) { retval.first = true; retval.second = worstBloat.begin()->second; } else if ( ! worstSpread.empty() && (worstSpread.begin()->first > spreadLimit)) { @@ -416,8 +422,8 @@ LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit) } void -LogDataStore::compactWorst(double bloatLimit, double spreadLimit) { - auto worst = findNextToCompact(bloatLimit, spreadLimit); +LogDataStore::compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat) { + auto worst = findNextToCompact(bloatLimit, spreadLimit, prioritizeDiskBloat); if (worst.first) { compactFile(worst.second); } diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h index 7bd55599611..c709c607f37 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -187,7 +187,7 @@ private: // Implements ISetLid API void setLid(const LockGuard & guard, uint32_t lid, const LidInfo & lm) override; - void compactWorst(double bloatLimit, double spreadLimit); + void compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat); void compactFile(FileId chunkId); typedef vespalib::RcuVector<uint64_t> LidInfoVector; @@ -203,6 +203,8 @@ private: NameIdSet eraseIncompleteCompactedFiles(NameIdSet partList); void internalFlushAll(); + bool isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const; + NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix); FileId allocateFileId(const LockGuard & guard); void setNewFileChunk(const LockGuard & guard, FileChunk::UP fileChunk); @@ -266,7 +268,7 @@ private: return (_fileChunks.empty() ? 0 : _fileChunks.back()->getLastPersistedSerialNum()); } bool shouldCompactToActiveFile(size_t compactedSize) const; - std::pair<bool, FileId> findNextToCompact(double bloatLimit, double spreadLimit); + std::pair<bool, FileId> findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat); void incGeneration(); bool canShrinkLidSpace(const vespalib::LockGuard &guard) const; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp index f41ada8b2e8..1a269e556d6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp @@ -102,7 +102,11 @@ DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string & string source_field = argument.empty() ? fieldName : argument; if (getEnvironment() && getEnvironment()->getAttributeManager()) { auto attr_ctx = getEnvironment()->getAttributeManager()->createContext(); - fieldWriter = AttributeCombinerDFW::create(source_field, *attr_ctx, true, matching_elems_fields); + if (attr_ctx->getAttribute(source_field) != nullptr) { + fieldWriter = AttributeDFWFactory::create(*getEnvironment()->getAttributeManager(), source_field, true, matching_elems_fields); + } else { + fieldWriter = AttributeCombinerDFW::create(source_field, *attr_ctx, true, matching_elems_fields); + } rc = static_cast<bool>(fieldWriter); } } else if (overrideName == "matchedelementsfilter") { |