summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java32
-rw-r--r--config-model/src/test/derived/importedfields/child.sd4
-rw-r--r--config-model/src/test/derived/importedfields/summary.cfg4
-rw-r--r--config-model/src/test/derived/importedfields/summarymap.cfg3
-rw-r--r--config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java45
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h6
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp6
9 files changed, 109 insertions, 17 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java
index 8bb834d4697..d86ed265b77 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolver.java
@@ -2,8 +2,13 @@
package com.yahoo.searchdefinition.processing;
import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.document.ArrayDataType;
+import com.yahoo.document.DataType;
+import com.yahoo.document.WeightedSetDataType;
import com.yahoo.searchdefinition.RankProfileRegistry;
import com.yahoo.searchdefinition.Search;
+import com.yahoo.searchdefinition.document.ComplexAttributeFieldUtils;
+import com.yahoo.searchdefinition.document.ImmutableSDField;
import com.yahoo.vespa.documentmodel.DocumentSummary;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
@@ -43,15 +48,40 @@ public class MatchedElementsOnlyResolver extends Processor {
if (isComplexFieldWithOnlyStructFieldAttributes(sourceField)) {
field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER);
}
+ } else if (isSupportedAttributeField(sourceField)) {
+ field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER);
} else if (validate) {
fail(summary, field, "'matched-elements-only' is not supported for this field type. " +
- "Supported field types are array of simple struct, map of primitive type to simple struct, " +
+ "Supported field types are: array attribute, weighted set attribute, " +
+ "array of simple struct, map of primitive type to simple struct, " +
"and map of primitive type to primitive type");
}
}
// else case is handled in SummaryFieldsMustHaveValidSource
}
+ private boolean isSupportedAttributeField(ImmutableSDField sourceField) {
+ var type = sourceField.getDataType();
+ return sourceField.doesAttributing() &&
+ (isArrayOfPrimitiveType(type) || isWeightedsetOfPrimitiveType(type));
+ }
+
+ private boolean isArrayOfPrimitiveType(DataType type) {
+ if (type instanceof ArrayDataType) {
+ var arrayType = (ArrayDataType) type;
+ return ComplexAttributeFieldUtils.isPrimitiveType(arrayType.getNestedType());
+ }
+ return false;
+ }
+
+ private boolean isWeightedsetOfPrimitiveType(DataType type) {
+ if (type instanceof WeightedSetDataType) {
+ var wsetType = (WeightedSetDataType) type;
+ return ComplexAttributeFieldUtils.isPrimitiveType(wsetType.getNestedType());
+ }
+ return false;
+ }
+
private void fail(DocumentSummary summary, SummaryField field, String msg) {
throw new IllegalArgumentException(formatError(search, summary, field, msg));
}
diff --git a/config-model/src/test/derived/importedfields/child.sd b/config-model/src/test/derived/importedfields/child.sd
index d541ba8fc8c..95fe44d333a 100644
--- a/config-model/src/test/derived/importedfields/child.sd
+++ b/config-model/src/test/derived/importedfields/child.sd
@@ -23,5 +23,9 @@ search child {
summary my_int_array_field type array<int> {}
summary my_int_wset_field type weightedset<int> {}
summary my_ancient_int_field type int {}
+ summary my_filtered_int_array_field type array<int> {
+ source: my_int_array_field
+ matched-elements-only
+ }
}
}
diff --git a/config-model/src/test/derived/importedfields/summary.cfg b/config-model/src/test/derived/importedfields/summary.cfg
index 68ad270a314..f95949cfa62 100644
--- a/config-model/src/test/derived/importedfields/summary.cfg
+++ b/config-model/src/test/derived/importedfields/summary.cfg
@@ -9,7 +9,7 @@ classes[].fields[].name "summaryfeatures"
classes[].fields[].type "featuredata"
classes[].fields[].name "documentid"
classes[].fields[].type "longstring"
-classes[].id 1660388492
+classes[].id 159551552
classes[].name "mysummary"
classes[].fields[].name "a_ref"
classes[].fields[].type "longstring"
@@ -25,6 +25,8 @@ classes[].fields[].name "my_int_wset_field"
classes[].fields[].type "jsonstring"
classes[].fields[].name "my_ancient_int_field"
classes[].fields[].type "integer"
+classes[].fields[].name "my_filtered_int_array_field"
+classes[].fields[].type "jsonstring"
classes[].fields[].name "rankfeatures"
classes[].fields[].type "featuredata"
classes[].fields[].name "summaryfeatures"
diff --git a/config-model/src/test/derived/importedfields/summarymap.cfg b/config-model/src/test/derived/importedfields/summarymap.cfg
index d038e2313a8..03ae5e2676e 100644
--- a/config-model/src/test/derived/importedfields/summarymap.cfg
+++ b/config-model/src/test/derived/importedfields/summarymap.cfg
@@ -14,6 +14,9 @@ override[].arguments "my_int_wset_field"
override[].field "my_ancient_int_field"
override[].command "attribute"
override[].arguments "my_ancient_int_field"
+override[].field "my_filtered_int_array_field"
+override[].command "matchedattributeelementsfilter"
+override[].arguments "my_int_array_field"
override[].field "rankfeatures"
override[].command "rankfeatures"
override[].arguments ""
diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java
index 3b6918c04ae..0ef696df6cf 100644
--- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java
+++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/MatchedElementsOnlyResolverTestCase.java
@@ -75,7 +75,7 @@ public class MatchedElementsOnlyResolverTestCase {
}
@Test
- public void explicit_summary_field_can_use_filter_transform_with_reference_to_source_field() throws ParseException {
+ public void explicit_complex_summary_field_can_use_filter_transform_with_reference_to_source_field() throws ParseException {
String documentSummary = joinLines("document-summary my_summary {",
" summary my_filter_field type map<string, string> {",
" source: my_field",
@@ -108,12 +108,51 @@ public class MatchedElementsOnlyResolverTestCase {
}
@Test
+ public void primitive_array_attribute_field_gets_attribute_transform() throws ParseException {
+ assertSummaryField(joinLines("field my_field type array<string> {",
+ " indexing: attribute | summary",
+ " summary: matched-elements-only",
+ "}"),
+ "my_field", SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER);
+ }
+
+ @Test
+ public void primitive_weighted_set_attribute_field_gets_attribute_transform() throws ParseException {
+ assertSummaryField(joinLines("field my_field type weightedset<string> {",
+ " indexing: attribute | summary",
+ " summary: matched-elements-only",
+ "}"),
+ "my_field", SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER);
+ }
+
+ @Test
+ public void explicit_summary_field_can_use_filter_transform_with_reference_to_attribute_source_field() throws ParseException {
+ String documentSummary = joinLines("document-summary my_summary {",
+ " summary my_filter_field type array<string> {",
+ " source: my_field",
+ " matched-elements-only",
+ " }",
+ "}");
+
+ var search = buildSearch(joinLines("field my_field type array<string> {",
+ " indexing: attribute | summary",
+ "}"),
+ documentSummary);
+ assertSummaryField(search.getSummaryField("my_filter_field"),
+ SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER, "my_field");
+ assertSummaryField(search.getSummaryField("my_field"),
+ SummaryTransform.ATTRIBUTE, "my_field");
+ }
+
+ @Test
public void unsupported_field_type_throws() throws ParseException {
exceptionRule.expect(IllegalArgumentException.class);
exceptionRule.expectMessage("For search 'test', document summary 'default', summary field 'my_field': " +
"'matched-elements-only' is not supported for this field type. " +
- "Supported field types are array of simple struct, map of primitive type to simple struct, and map of primitive type to primitive type");
- buildSearch(joinLines("field my_field type string {",
+ "Supported field types are: array attribute, weighted set attribute, " +
+ "array of simple struct, map of primitive type to simple struct, " +
+ "and map of primitive type to primitive type");
+ buildSearch(joinLines("field my_field type array<string> {",
" indexing: summary",
" summary: matched-elements-only",
"}"));
diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp
index c8d0a572cd9..361fde48aa8 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp
@@ -128,6 +128,8 @@ void find_matching_elements(const MatchingElementsFields &fields, const std::vec
} else if (const AttrSearchCtx *attr_ctx = bp.get_attribute_search_context()) {
if (fields.has_struct_field(attr_ctx->attributeName())) {
find_matching_elements(docs, fields.get_enclosing_field(attr_ctx->attributeName()), *attr_ctx, result);
+ } else if (fields.has_field(attr_ctx->attributeName())) {
+ find_matching_elements(docs, attr_ctx->attributeName(), *attr_ctx, result);
}
} else if (auto and_not = as<AndNotBlueprint>(bp)) {
find_matching_elements(fields, docs, and_not->getChild(0), result);
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index ae0696d5824..4adb3507eeb 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -303,7 +303,7 @@ LogDataStore::compact(uint64_t syncToken)
const bool doCompact = (_fileChunks.size() > 1);
if (doCompact) {
LOG(info, "%s. Will compact", bloatMsg(bloat, usage).c_str());
- compactWorst(_config.getMaxDiskBloatFactor(), _config.getMaxBucketSpread());
+ compactWorst(_config.getMaxDiskBloatFactor(), _config.getMaxBucketSpread(), isTotalDiskBloatExceeded(usage, bloat));
}
flushActiveAndWait(syncToken);
if (doCompact) {
@@ -313,16 +313,22 @@ LogDataStore::compact(uint64_t syncToken)
}
}
+bool
+LogDataStore::isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const {
+ const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor();
+ return bloat > maxConfiguredDiskBloat;
+}
+
size_t
LogDataStore::getMaxCompactGain() const
{
- const size_t diskFootPrint = getDiskFootprint();
- const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor();
- double maxSpread = getMaxBucketSpread();
size_t bloat = getDiskBloat();
- if (bloat < maxConfiguredDiskBloat) {
+ const size_t diskFootPrint = getDiskFootprint();
+ if ( ! isTotalDiskBloatExceeded(diskFootPrint, bloat) ) {
bloat = 0;
}
+
+ const double maxSpread = getMaxBucketSpread();
size_t spreadAsBloat = diskFootPrint * (1.0 - 1.0/maxSpread);
if ( maxSpread < _config.getMaxBucketSpread()) {
spreadAsBloat = 0;
@@ -374,7 +380,7 @@ LogDataStore::getMaxBucketSpread() const
}
std::pair<bool, LogDataStore::FileId>
-LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit)
+LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat)
{
typedef std::multimap<double, FileId, std::greater<double>> CostMap;
CostMap worstBloat;
@@ -402,7 +408,7 @@ LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit)
}
}
std::pair<bool, FileId> retval(false, FileId(-1));
- if ( ! worstBloat.empty() && (worstBloat.begin()->first > bloatLimit)) {
+ if ( ! worstBloat.empty() && (worstBloat.begin()->first > bloatLimit) && prioritizeDiskBloat) {
retval.first = true;
retval.second = worstBloat.begin()->second;
} else if ( ! worstSpread.empty() && (worstSpread.begin()->first > spreadLimit)) {
@@ -416,8 +422,8 @@ LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit)
}
void
-LogDataStore::compactWorst(double bloatLimit, double spreadLimit) {
- auto worst = findNextToCompact(bloatLimit, spreadLimit);
+LogDataStore::compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat) {
+ auto worst = findNextToCompact(bloatLimit, spreadLimit, prioritizeDiskBloat);
if (worst.first) {
compactFile(worst.second);
}
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
index 7bd55599611..c709c607f37 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -187,7 +187,7 @@ private:
// Implements ISetLid API
void setLid(const LockGuard & guard, uint32_t lid, const LidInfo & lm) override;
- void compactWorst(double bloatLimit, double spreadLimit);
+ void compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat);
void compactFile(FileId chunkId);
typedef vespalib::RcuVector<uint64_t> LidInfoVector;
@@ -203,6 +203,8 @@ private:
NameIdSet eraseIncompleteCompactedFiles(NameIdSet partList);
void internalFlushAll();
+ bool isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const;
+
NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix);
FileId allocateFileId(const LockGuard & guard);
void setNewFileChunk(const LockGuard & guard, FileChunk::UP fileChunk);
@@ -266,7 +268,7 @@ private:
return (_fileChunks.empty() ? 0 : _fileChunks.back()->getLastPersistedSerialNum());
}
bool shouldCompactToActiveFile(size_t compactedSize) const;
- std::pair<bool, FileId> findNextToCompact(double bloatLimit, double spreadLimit);
+ std::pair<bool, FileId> findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat);
void incGeneration();
bool canShrinkLidSpace(const vespalib::LockGuard &guard) const;
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp
index f41ada8b2e8..1a269e556d6 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumconfig.cpp
@@ -102,7 +102,11 @@ DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string &
string source_field = argument.empty() ? fieldName : argument;
if (getEnvironment() && getEnvironment()->getAttributeManager()) {
auto attr_ctx = getEnvironment()->getAttributeManager()->createContext();
- fieldWriter = AttributeCombinerDFW::create(source_field, *attr_ctx, true, matching_elems_fields);
+ if (attr_ctx->getAttribute(source_field) != nullptr) {
+ fieldWriter = AttributeDFWFactory::create(*getEnvironment()->getAttributeManager(), source_field, true, matching_elems_fields);
+ } else {
+ fieldWriter = AttributeCombinerDFW::create(source_field, *attr_ctx, true, matching_elems_fields);
+ }
rc = static_cast<bool>(fieldWriter);
}
} else if (overrideName == "matchedelementsfilter") {