diff options
Diffstat (limited to 'container-search/src')
26 files changed, 563 insertions, 261 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java index 3cf86a70985..b900dee20ba 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java @@ -16,15 +16,21 @@ public class FuzzyItem extends TermItem { private int maxEditDistance; private int prefixLength; + private boolean prefixMatch; public static int DEFAULT_MAX_EDIT_DISTANCE = 2; public static int DEFAULT_PREFIX_LENGTH = 0; - public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength) { + public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength, boolean prefixMatch) { super(indexName, isFromQuery, null); setValue(term); setMaxEditDistance(maxEditDistance); setPrefixLength(prefixLength); + setPrefixMatch(prefixMatch); + } + + public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength) { + this(indexName, isFromQuery, term, maxEditDistance, prefixLength, false); } public void setMaxEditDistance(int maxEditDistance) { @@ -43,6 +49,19 @@ public class FuzzyItem extends TermItem { return this.maxEditDistance; } + public boolean isPrefixMatch() { + return this.prefixMatch; + } + + public void setPrefixMatch(boolean prefixMatch) { + this.prefixMatch = prefixMatch; + } + + @Override + protected boolean hasPrefixMatchSemantics() { + return this.prefixMatch; + } + @Override public void setValue(String value) { this.term = value; @@ -89,43 +108,39 @@ public class FuzzyItem extends TermItem { } @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (!super.equals(obj)) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - FuzzyItem other = (FuzzyItem) obj; - if (!this.term.equals(other.term)) return false; - if (this.maxEditDistance != other.maxEditDistance) return false; - if (this.prefixLength != other.prefixLength) return false; - return true; + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + FuzzyItem fuzzyItem = (FuzzyItem) o; + return maxEditDistance == fuzzyItem.maxEditDistance && + prefixLength == fuzzyItem.prefixLength && + prefixMatch == fuzzyItem.prefixMatch && + Objects.equals(term, fuzzyItem.term); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), term, maxEditDistance, prefixLength); + return Objects.hash(super.hashCode(), term, maxEditDistance, prefixLength, prefixMatch); } @Override protected void appendHeadingString(StringBuilder buffer) { buffer.append(getName()); - buffer.append("("); + buffer.append('('); buffer.append(this.term); - buffer.append(","); + buffer.append(','); buffer.append(this.maxEditDistance); - buffer.append(","); + buffer.append(','); buffer.append(this.prefixLength); - buffer.append(")"); - buffer.append(" "); + buffer.append(','); + buffer.append(this.prefixMatch); + buffer.append(") "); } @Override protected void encodeThis(ByteBuffer buffer) { + // Prefix matching is communicated via term header flags super.encodeThis(buffer); putString(getIndexedString(), buffer); IntegerCompressor.putCompressedPositiveNumber(this.maxEditDistance, buffer); diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index f43b55424e6..099c546e3f0 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -161,6 +161,16 @@ public abstract class Item implements Cloneable { } /** + * Indicates that a query item that does not normally match with prefix semantics + * should do so for this particular query item instance. + * + * False by default; should be overridden by subclasses that want to signal this behavior. + */ + protected boolean hasPrefixMatchSemantics() { + return false; + } + + /** * Returns the item creator value. * * @deprecated use isFilter(boolean) @@ -286,6 +296,7 @@ public abstract class Item implements Cloneable { byte FLAGS_SPECIALTOKEN = 0x02; byte FLAGS_NOPOSITIONDATA = 0x04; byte FLAGS_ISFILTER = 0x08; + byte FLAGS_PREFIX_MATCH = 0x10; byte ret = 0; if (!isRanked()) { @@ -300,6 +311,9 @@ public abstract class Item implements Cloneable { if (isFilter()) { ret |= FLAGS_ISFILTER; } + if (hasPrefixMatchSemantics()) { + ret |= FLAGS_PREFIX_MATCH; + } return ret; } diff --git a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java index 72184c5ea32..4d9111b2711 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java @@ -9,8 +9,6 @@ import com.yahoo.component.chain.Chain; import com.yahoo.component.chain.dependencies.After; import com.yahoo.component.chain.dependencies.Provides; import com.yahoo.component.provider.ComponentRegistry; -import com.yahoo.errorhandling.Results; -import com.yahoo.errorhandling.Results.Builder; import com.yahoo.processing.IllegalInputException; import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; @@ -19,12 +17,12 @@ import com.yahoo.search.Searcher; import com.yahoo.search.federation.selection.FederationTarget; import com.yahoo.search.federation.selection.TargetSelector; import com.yahoo.search.federation.sourceref.ModifyQueryAndResult; +import com.yahoo.search.federation.sourceref.ResolveResult; import com.yahoo.search.federation.sourceref.SearchChainInvocationSpec; import com.yahoo.search.federation.sourceref.SearchChainResolver; import com.yahoo.search.federation.sourceref.SingleTarget; import com.yahoo.search.federation.sourceref.SourceRefResolver; import com.yahoo.search.federation.sourceref.SourcesTarget; -import com.yahoo.search.federation.sourceref.UnresolvedSearchChainException; import com.yahoo.search.federation.sourceref.VirtualSourceResolver; import com.yahoo.search.query.Properties; import com.yahoo.search.result.ErrorMessage; @@ -53,6 +51,7 @@ import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.TimeUnit; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -206,14 +205,44 @@ public class FederationSearcher extends ForkingSearcher { setRequestTimeoutInMilliseconds(searchChain.requestTimeoutMillis()); } + private static List<String> extractErrors(List<ResolveResult> results) { + List<String> errors = List.of(); + for (ResolveResult result : results) { + if (result.errorMsg() != null) { + if (errors.isEmpty()) { + errors = new ArrayList<>(); + } + errors.add(result.errorMsg()); + } + } + return errors; + } + + private static List<SearchChainInvocationSpec> extractSpecs(List<ResolveResult> results) { + List<SearchChainInvocationSpec> errors = List.of(); + for (ResolveResult result : results) { + if (result.invocationSpec() != null) { + if (errors.isEmpty()) { + errors = List.of(result.invocationSpec()); + } else if (errors.size() == 1) { + errors = new ArrayList<>(errors); + errors.add(result.invocationSpec()); + } else { + errors.add(result.invocationSpec()); + } + } + } + return errors; + } + @Override public Result search(Query query, Execution execution) { Result mergedResults = execution.search(query); var targets = getTargets(query.getModel().getSources(), query.properties()); - warnIfUnresolvedSearchChains(targets.errors(), mergedResults.hits()); + warnIfUnresolvedSearchChains(extractErrors(targets), mergedResults.hits()); - var prunedTargets = pruneTargetsWithoutDocumentTypes(query.getModel().getRestrict(), targets.data()); + var prunedTargets = pruneTargetsWithoutDocumentTypes(query.getModel().getRestrict(), extractSpecs(targets)); var regularTargetHandlers = resolveSearchChains(prunedTargets, execution.searchChainRegistry()); query.errors().addAll(regularTargetHandlers.errors()); @@ -311,32 +340,19 @@ public class FederationSearcher extends ForkingSearcher { .forEach((k, v) -> outgoing.properties().set(k, v)); } - private ErrorMessage missingSearchChainsErrorMessage(List<UnresolvedSearchChainException> unresolvedSearchChainExceptions) { - String message = String.join(" ", getMessagesSet(unresolvedSearchChainExceptions)) + + private ErrorMessage missingSearchChainsErrorMessage(List<String> errors) { + String message = String.join(" ", new TreeSet<>(errors)) + " Valid source refs are " + String.join(", ", allSourceRefDescriptions()) +'.'; return ErrorMessage.createInvalidQueryParameter(message); } private List<String> allSourceRefDescriptions() { - List<String> descriptions = new ArrayList<>(); - - for (com.yahoo.search.federation.sourceref.Target target : searchChainResolver.allTopLevelTargets()) - descriptions.add(target.searchRefDescription()); - return descriptions; - } - - private static Set<String> getMessagesSet(List<UnresolvedSearchChainException> unresolvedSearchChainExceptions) { - Set<String> messages = new LinkedHashSet<>(); - for (UnresolvedSearchChainException exception : unresolvedSearchChainExceptions) { - messages.add(exception.getMessage()); - } - return messages; + return searchChainResolver.allTopLevelTargets().stream().map(com.yahoo.search.federation.sourceref.Target::searchRefDescription).toList(); } - private void warnIfUnresolvedSearchChains(List<UnresolvedSearchChainException> missingTargets, - HitGroup errorHitGroup) { - if (!missingTargets.isEmpty()) { - errorHitGroup.addError(missingSearchChainsErrorMessage(missingTargets)); + private void warnIfUnresolvedSearchChains(List<String> errorMessages, HitGroup errorHitGroup) { + if (!errorMessages.isEmpty()) { + errorHitGroup.addError(missingSearchChainsErrorMessage(errorMessages)); } } @@ -344,7 +360,7 @@ public class FederationSearcher extends ForkingSearcher { public Collection<CommentedSearchChain> getSearchChainsForwarded(SearchChainRegistry registry) { List<CommentedSearchChain> searchChains = new ArrayList<>(); - for (com.yahoo.search.federation.sourceref.Target target : searchChainResolver.allTopLevelTargets()) { + for (var target : searchChainResolver.allTopLevelTargets()) { if (target instanceof SourcesTarget) { searchChains.addAll(commentedSourceProviderSearchChains((SourcesTarget)target, registry)); } else if (target instanceof SingleTarget) { @@ -468,40 +484,32 @@ public class FederationSearcher extends ForkingSearcher { return orderer; } - private Results<SearchChainInvocationSpec, UnresolvedSearchChainException> getTargets(Set<String> sources, Properties properties) { + private List<ResolveResult> getTargets(Set<String> sources, Properties properties) { return sources.isEmpty() ? defaultSearchChains(properties): resolveSources(sources, properties); } - private Results<SearchChainInvocationSpec, UnresolvedSearchChainException> resolveSources(Set<String> sourcesInQuery, Properties properties) { - Results.Builder<SearchChainInvocationSpec, UnresolvedSearchChainException> result = new Builder<>(); + private List<ResolveResult> resolveSources(Set<String> sourcesInQuery, Properties properties) { + List<ResolveResult> result = new ArrayList<>(); Set<String> sources = virtualSourceResolver.resolve(sourcesInQuery); for (String source : sources) { - try { - result.addAllData(sourceRefResolver.resolve(asSourceSpec(source), properties)); - } catch (UnresolvedSearchChainException e) { - result.addError(e); - } + result.addAll(sourceRefResolver.resolve(asSourceSpec(source), properties)); } - return result.build(); + return List.copyOf(result); } - public Results<SearchChainInvocationSpec, UnresolvedSearchChainException> defaultSearchChains(Properties sourceToProviderMap) { - Results.Builder<SearchChainInvocationSpec, UnresolvedSearchChainException> result = new Builder<>(); + public List<ResolveResult> defaultSearchChains(Properties sourceToProviderMap) { + List<ResolveResult> result = new ArrayList<>(); - for (com.yahoo.search.federation.sourceref.Target target : searchChainResolver.defaultTargets()) { - try { - result.addData(target.responsibleSearchChain(sourceToProviderMap)); - } catch (UnresolvedSearchChainException e) { - result.addError(e); - } + for (var target : searchChainResolver.defaultTargets()) { + result.add(target.responsibleSearchChain(sourceToProviderMap)); } - return result.build(); + return List.copyOf(result); } diff --git a/container-search/src/main/java/com/yahoo/search/federation/Results.java b/container-search/src/main/java/com/yahoo/search/federation/Results.java new file mode 100644 index 00000000000..7598a14f759 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/Results.java @@ -0,0 +1,45 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * @author Tony Vaagenes + */ +public class Results<DATA, ERROR> { + + private final List<DATA> data; + private final List<ERROR> errors; + + private Results(List<DATA> data, List<ERROR> errors) { + this.data = List.copyOf(data); + this.errors = List.copyOf(errors); + } + + public List<DATA> data() { + return data; + } + + public List<ERROR> errors() { + return errors; + } + + public static class Builder<DATA, ERROR> { + private final List<DATA> data = new ArrayList<>(); + private final List<ERROR> errors = new ArrayList<>(); + + public void addData(DATA d) { + data.add(d); + } + public void addError(ERROR e) { + errors.add(e); + } + + public Results<DATA, ERROR> build() { + return new Results<>(data, errors); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/ResolveResult.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/ResolveResult.java new file mode 100644 index 00000000000..d9681140ae9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/ResolveResult.java @@ -0,0 +1,14 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +/** + * @author baldersheim + */ +public record ResolveResult(SearchChainInvocationSpec invocationSpec, String errorMsg) { + ResolveResult(SearchChainInvocationSpec invocationSpec) { + this(invocationSpec, null); + } + ResolveResult(String errorMsg) { + this(null, errorMsg); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java index 7dc65c819e4..9e45b6576a6 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java @@ -46,16 +46,6 @@ public class SearchChainResolver { public static class Builder { - public interface InvocationSpecFactory { - SearchChainInvocationSpec create(ComponentId searchChainId, FederationOptions federationOptions, List<String> schemas); - } - - private class DefaultInvocationSpecFactory implements InvocationSpecFactory { - public SearchChainInvocationSpec create(ComponentId searchChainId, FederationOptions federationOptions, List<String> schemas) { - return new SearchChainInvocationSpec(searchChainId, federationOptions, schemas); - } - } - private final SortedSet<Target> defaultTargets = new TreeSet<>(); private final ComponentRegistry<Target> targets = new ComponentRegistry<>() { @@ -137,19 +127,13 @@ public class SearchChainResolver { this.defaultTargets = Collections.unmodifiableSortedSet(defaultTargets); } - public SearchChainInvocationSpec resolve(ComponentSpecification sourceRef, Properties sourceToProviderMap) - throws UnresolvedSearchChainException { + public ResolveResult resolve(ComponentSpecification sourceRef, Properties sourceToProviderMap) { - Target target = resolveTarget(sourceRef); - return target.responsibleSearchChain(sourceToProviderMap); - } - - private Target resolveTarget(ComponentSpecification sourceRef) throws UnresolvedSearchChainException { Target target = targets.getComponent(sourceRef); if (target == null) { - throw UnresolvedSourceRefException.createForMissingSourceRef(sourceRef); + return new ResolveResult(SourceRefResolver.createForMissingSourceRef(sourceRef)); } - return target; + return target.responsibleSearchChain(sourceToProviderMap); } public SortedSet<Target> allTopLevelTargets() { diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java index 608566552cd..3de67908217 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java @@ -17,8 +17,8 @@ public class SingleTarget extends Target { } @Override - public SearchChainInvocationSpec responsibleSearchChain(Properties queryProperties) { - return searchChainInvocationSpec; + public ResolveResult responsibleSearchChain(Properties queryProperties) { + return new ResolveResult(searchChainInvocationSpec); } @Override diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java index 2e7849dd85a..b5c40db01f8 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java @@ -4,10 +4,9 @@ package com.yahoo.search.federation.sourceref; import com.yahoo.component.ComponentSpecification; import com.yahoo.processing.request.Properties; -import java.util.LinkedHashSet; +import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Set; /** * Maps a source reference to search chain invocation specs. @@ -24,21 +23,18 @@ public class SourceRefResolver { this.schema2Clusters = schema2Clusters; } - public Set<SearchChainInvocationSpec> resolve(ComponentSpecification sourceRef, - Properties sourceToProviderMap) throws UnresolvedSearchChainException { - try { - return Set.of(searchChainResolver.resolve(sourceRef, sourceToProviderMap)); - } catch (UnresolvedSourceRefException e) { + public List<ResolveResult> resolve(ComponentSpecification sourceRef, Properties sourceToProviderMap) { + ResolveResult searchChainResolveResult = searchChainResolver.resolve(sourceRef, sourceToProviderMap); + if (searchChainResolveResult.invocationSpec() == null) { return resolveClustersWithDocument(sourceRef, sourceToProviderMap); } + return List.of(searchChainResolveResult); } - private Set<SearchChainInvocationSpec> resolveClustersWithDocument(ComponentSpecification sourceRef, - Properties sourceToProviderMap) - throws UnresolvedSearchChainException { + private List<ResolveResult> resolveClustersWithDocument(ComponentSpecification sourceRef, Properties sourceToProviderMap) { if (hasOnlyName(sourceRef)) { - Set<SearchChainInvocationSpec> clusterSearchChains = new LinkedHashSet<>(); + List<ResolveResult> clusterSearchChains = new ArrayList<>(); List<String> clusters = schema2Clusters.getOrDefault(sourceRef.getName(), List.of()); for (String cluster : clusters) { @@ -48,21 +44,22 @@ public class SourceRefResolver { if ( ! clusterSearchChains.isEmpty()) return clusterSearchChains; } - throw UnresolvedSourceRefException.createForMissingSourceRef(sourceRef); + return List.of(new ResolveResult(createForMissingSourceRef(sourceRef))); } - private SearchChainInvocationSpec resolveClusterSearchChain(String cluster, - ComponentSpecification sourceRef, - Properties sourceToProviderMap) - throws UnresolvedSearchChainException { - try { - return searchChainResolver.resolve(new ComponentSpecification(cluster), sourceToProviderMap); - } - catch (UnresolvedSearchChainException e) { - throw new UnresolvedSearchChainException("Failed to resolve cluster search chain '" + cluster + - "' when using source ref '" + sourceRef + - "' as a document name."); + static String createForMissingSourceRef(ComponentSpecification source) { + return "Could not resolve source ref '" + source + "'."; + } + + private ResolveResult resolveClusterSearchChain(String cluster, + ComponentSpecification sourceRef, + Properties sourceToProviderMap) { + var resolveResult = searchChainResolver.resolve(new ComponentSpecification(cluster), sourceToProviderMap); + if (resolveResult.invocationSpec() == null) { + return new ResolveResult("Failed to resolve cluster search chain '" + cluster + + "' when using source ref '" + sourceRef + "' as a document name."); } + return resolveResult; } private boolean hasOnlyName(ComponentSpecification sourceSpec) { diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java index b6d99758c7b..a3c0328290d 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java @@ -16,7 +16,7 @@ import java.util.TreeSet; public class SourcesTarget extends Target { - private ComponentRegistry<ComponentAdaptor<SearchChainInvocationSpec>> providerSources = new ComponentRegistry<>() {}; + private final ComponentRegistry<ComponentAdaptor<SearchChainInvocationSpec>> providerSources = new ComponentRegistry<>() {}; private SearchChainInvocationSpec defaultProviderSource; @@ -25,10 +25,10 @@ public class SourcesTarget extends Target { } @Override - public SearchChainInvocationSpec responsibleSearchChain(Properties queryProperties) throws UnresolvedSearchChainException { + public ResolveResult responsibleSearchChain(Properties queryProperties) { ComponentSpecification providerSpecification = providerSpecificationForSource(queryProperties); if (providerSpecification == null) { - return defaultProviderSource; + return new ResolveResult(defaultProviderSource); } else { return lookupProviderSource(providerSpecification); } @@ -36,11 +36,7 @@ public class SourcesTarget extends Target { @Override public String searchRefDescription() { - StringBuilder builder = new StringBuilder(sourceId().stringValue()); - builder.append("[provider = "). - append(Joiner.on(", ").join(allProviderIdsStringValue())). - append("]"); - return builder.toString(); + return sourceId().stringValue() + "[provider = " + Joiner.on(", ").join(allProviderIdsStringValue()) + "]"; } private SortedSet<String> allProviderIdsStringValue() { @@ -51,14 +47,13 @@ public class SourcesTarget extends Target { return result; } - private SearchChainInvocationSpec lookupProviderSource(ComponentSpecification providerSpecification) - throws UnresolvedSearchChainException { + private ResolveResult lookupProviderSource(ComponentSpecification providerSpecification) { ComponentAdaptor<SearchChainInvocationSpec> providerSource = providerSources.getComponent(providerSpecification); if (providerSource == null) - throw UnresolvedProviderException.createForMissingProvider(sourceId(), providerSpecification); + return new ResolveResult("No provider '" + sourceId() + "' for source '" + providerSpecification + "'."); - return providerSource.model; + return new ResolveResult(providerSource.model); } public void freeze() { diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java index 38baf084d97..d35f7f7b181 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java @@ -23,9 +23,8 @@ public abstract class Target extends AbstractComponent { this(localId, false); } - public abstract SearchChainInvocationSpec responsibleSearchChain(Properties queryProperties) throws UnresolvedSearchChainException; + public abstract ResolveResult responsibleSearchChain(Properties queryProperties); public abstract String searchRefDescription(); abstract void freeze(); - } diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedProviderException.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedProviderException.java deleted file mode 100644 index aa21ad3b369..00000000000 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedProviderException.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.search.federation.sourceref; - -import com.yahoo.component.ComponentId; -import com.yahoo.component.ComponentSpecification; - -/** - * @author Tony Vaagenes - */ -@SuppressWarnings("serial") -class UnresolvedProviderException extends UnresolvedSearchChainException { - UnresolvedProviderException(String msg) { - super(msg); - } - - static UnresolvedSearchChainException createForMissingProvider(ComponentId source, - ComponentSpecification provider) { - return new UnresolvedProviderException("No provider '" + provider + "' for source '" + source + "'."); - } -} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSearchChainException.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSearchChainException.java deleted file mode 100644 index 0c8562e6032..00000000000 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSearchChainException.java +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.search.federation.sourceref; - -/** - * Thrown if a search chain can not be resolved from one or more ids. - * @author Tony Vaagenes - */ -public class UnresolvedSearchChainException extends Exception { - public UnresolvedSearchChainException(String msg) { - super(msg); - } -} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSourceRefException.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSourceRefException.java deleted file mode 100644 index fa2c1da13f0..00000000000 --- a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSourceRefException.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.search.federation.sourceref; - -import com.yahoo.component.ComponentSpecification; - -/** - * @author Tony Vaagenes - */ -class UnresolvedSourceRefException extends UnresolvedSearchChainException { - UnresolvedSourceRefException(String msg) { - super(msg); - } - - - static UnresolvedSearchChainException createForMissingSourceRef(ComponentSpecification source) { - return new UnresolvedSourceRefException("Could not resolve source ref '" + source + "'."); - } -} diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java index c897afe144c..c90612425fa 100644 --- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java +++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java @@ -1150,8 +1150,9 @@ public class SelectParser implements Parser { Integer maxEditDistance = getIntegerAnnotation(MAX_EDIT_DISTANCE, annotations, FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE); Integer prefixLength = getIntegerAnnotation(PREFIX_LENGTH, annotations, FuzzyItem.DEFAULT_PREFIX_LENGTH); + boolean prefixMatch = getBoolAnnotation(PREFIX, annotations, Boolean.FALSE); - FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength); + FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength, prefixMatch); return leafStyleSettings(getAnnotations(value), fuzzy); } diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java index 9ea35339f8d..97220725fec 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java @@ -43,7 +43,8 @@ public class LocalProviderSpec { com.yahoo.search.searchers.ValidateFuzzySearcher.class, com.yahoo.search.yql.FieldFiller.class, com.yahoo.search.searchers.InputCheckingSearcher.class, - com.yahoo.search.searchers.ContainerLatencySearcher.class); + com.yahoo.search.searchers.ContainerLatencySearcher.class, + com.yahoo.search.significance.SignificanceSearcher.class); public final String clusterName; diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java new file mode 100644 index 00000000000..0a42bf8a259 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -0,0 +1,77 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.significance; + +import com.yahoo.component.annotation.Inject; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.language.Language; +import com.yahoo.language.significance.SignificanceModel; +import com.yahoo.language.significance.SignificanceModelRegistry; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +import java.util.Optional; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * Sets significance values on word items in the query tree. + * + * @author MariusArhaug + */ + +@Provides(SignificanceSearcher.SIGNIFICANCE) +@Before(STEMMING) +public class SignificanceSearcher extends Searcher { + + public final static String SIGNIFICANCE = "Significance"; + private final SignificanceModelRegistry significanceModelRegistry; + + + @Inject + public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry) { + this.significanceModelRegistry = significanceModelRegistry; + } + + @Override + public Result search(Query query, Execution execution) { + Language language = query.getModel().getParsingLanguage(); + Optional<SignificanceModel> model = significanceModelRegistry.getModel(language); + + if (model.isEmpty()) return execution.search(query); + + setIDF(query.getModel().getQueryTree().getRoot(), model.get()); + + return execution.search(query); + } + + private void setIDF(Item root, SignificanceModel significanceModel) { + if (root == null || root instanceof NullItem) return; + + if (root instanceof WordItem) { + + var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord()); + long N = documentFrequency.corpusSize(); + long nq_i = documentFrequency.frequency(); + double idf = calculateIDF(N, nq_i); + + ((WordItem) root).setSignificance(idf); + } else if (root instanceof CompositeItem) { + for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) { + setIDF(((CompositeItem) root).getItem(i), significanceModel); + } + } + } + + public static double calculateIDF(long N, long nq_i) { + return Math.log(1 + (N - nq_i + 0.5) / (nq_i + 0.5)); + } +} + + diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java index 634163bf0c2..a354006aa9b 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -551,24 +551,31 @@ public class VespaSerializer { static String fuzzyAnnotations(FuzzyItem fuzzyItem) { boolean isMaxEditDistanceSet = fuzzyItem.getMaxEditDistance() != FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE; boolean isPrefixLengthSet = fuzzyItem.getPrefixLength() != FuzzyItem.DEFAULT_PREFIX_LENGTH; - boolean anyAnnotationSet = isMaxEditDistanceSet || isPrefixLengthSet; + boolean isPrefixMatch = fuzzyItem.isPrefixMatch(); + boolean anyAnnotationSet = isMaxEditDistanceSet || isPrefixLengthSet || isPrefixMatch; - StringBuilder builder = new StringBuilder(); - if (anyAnnotationSet) { - builder.append("{"); + if (!anyAnnotationSet) { + return ""; } + + StringBuilder builder = new StringBuilder(); + builder.append("{"); if (isMaxEditDistanceSet) { builder.append(MAX_EDIT_DISTANCE + ":").append(fuzzyItem.getMaxEditDistance()); - } - if (isMaxEditDistanceSet && isPrefixLengthSet) { - builder.append(","); + if (isPrefixLengthSet || isPrefixMatch) { + builder.append(","); + } } if (isPrefixLengthSet) { builder.append(PREFIX_LENGTH + ":").append(fuzzyItem.getPrefixLength()); + if (isPrefixMatch) { + builder.append(","); + } } - if (anyAnnotationSet) { - builder.append("}"); + if (isPrefixMatch) { + builder.append(PREFIX).append(':').append(fuzzyItem.isPrefixMatch()); } + builder.append("}"); return builder.toString(); } } diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index e66cac5766c..fb4ec5ba872 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -1385,7 +1385,14 @@ public class YqlParser implements Parser { FuzzyItem.DEFAULT_PREFIX_LENGTH, PREFIX_LENGTH_DESCRIPTION); - FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength); + boolean prefixMatch = getAnnotation( + ast, + PREFIX, + Boolean.class, + Boolean.FALSE, + "setting for whether to use prefix match of input data"); + + FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength, prefixMatch); return leafStyleSettings(ast, fuzzy); } diff --git a/container-search/src/test/java/com/yahoo/search/federation/sourceref/SearchChainResolverTestCase.java b/container-search/src/test/java/com/yahoo/search/federation/sourceref/SearchChainResolverTestCase.java index d9046075f38..e5bbb48e807 100644 --- a/container-search/src/test/java/com/yahoo/search/federation/sourceref/SearchChainResolverTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/federation/sourceref/SearchChainResolverTestCase.java @@ -13,8 +13,8 @@ import java.util.Iterator; import java.util.SortedSet; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.fail; /** * @author Tony Vaagenes @@ -59,37 +59,33 @@ public class SearchChainResolverTestCase { @Test void require_error_message_for_invalid_source() { - try { - resolve("no-such-source"); - fail("Expected exception."); - } catch (UnresolvedSearchChainException e) { - assertEquals("Could not resolve source ref 'no-such-source'.", e.getMessage()); - } + var result = resolve("no-such-source"); + assertEquals("Could not resolve source ref 'no-such-source'.", result.errorMsg()); } @Test - void lookup_search_chain() throws Exception { - SearchChainInvocationSpec res = resolve(searchChainId.getName()); + void lookup_search_chain() { + SearchChainInvocationSpec res = resolve(searchChainId.getName()).invocationSpec(); assertEquals(searchChainId, res.searchChainId); } //TODO: TVT: @Test() - public void lookup_provider() throws Exception { - SearchChainInvocationSpec res = resolve(providerId.getName()); + public void lookup_provider() { + SearchChainInvocationSpec res = resolve(providerId.getName()).invocationSpec(); assertEquals(providerId, res.provider); assertNull(res.source); assertEquals(providerId, res.searchChainId); } @Test - void lookup_source() throws Exception { - SearchChainInvocationSpec res = resolve(sourceId.getName()); + void lookup_source() { + SearchChainInvocationSpec res = resolve(sourceId.getName()).invocationSpec(); assertIsSourceInProvider(res); } @Test - void lookup_source_search_chain_directly() throws Exception { - SearchChainInvocationSpec res = resolve(sourceChainInProviderId.stringValue()); + void lookup_source_search_chain_directly() { + SearchChainInvocationSpec res = resolve(sourceChainInProviderId.stringValue()).invocationSpec(); assertIsSourceInProvider(res); } @@ -100,8 +96,8 @@ public class SearchChainResolverTestCase { } @Test - void lookup_source_for_provider2() throws Exception { - SearchChainInvocationSpec res = resolve(sourceId.getName(), provider2Id.getName()); + void lookup_source_for_provider2() { + SearchChainInvocationSpec res = resolve(sourceId.getName(), provider2Id.getName()).invocationSpec(); assertEquals(provider2Id, res.provider); assertEquals(sourceId, res.source); assertEquals(sourceChainInProvider2Id, res.searchChainId); @@ -126,22 +122,24 @@ public class SearchChainResolverTestCase { return new PropertyMap(); } - private SearchChainInvocationSpec resolve(String sourceSpecification) throws UnresolvedSearchChainException { + private ResolveResult resolve(String sourceSpecification) { return resolve(sourceSpecification, emptySourceToProviderMap()); } - private SearchChainInvocationSpec resolve(String sourceSpecification, String providerSpecification) - throws UnresolvedSearchChainException { + private ResolveResult resolve(String sourceSpecification, String providerSpecification) { Properties sourceToProviderMap = emptySourceToProviderMap(); sourceToProviderMap.set("source." + sourceSpecification + ".provider", providerSpecification); return resolve(sourceSpecification, sourceToProviderMap); } - private SearchChainInvocationSpec resolve(String sourceSpecification, Properties sourceToProviderMap) - throws UnresolvedSearchChainException { - SearchChainInvocationSpec res = searchChainResolver.resolve( + private ResolveResult resolve(String sourceSpecification, Properties sourceToProviderMap) { + ResolveResult res = searchChainResolver.resolve( ComponentSpecification.fromString(sourceSpecification), sourceToProviderMap); - assertEquals(federationOptions, res.federationOptions); + if (res.invocationSpec() != null) { + assertEquals(federationOptions, res.invocationSpec().federationOptions); + } else { + assertNotNull(res.errorMsg()); + } return res; } diff --git a/container-search/src/test/java/com/yahoo/search/federation/sourceref/SourceRefResolverTestCase.java b/container-search/src/test/java/com/yahoo/search/federation/sourceref/SourceRefResolverTestCase.java index b32135afc94..95262937c01 100644 --- a/container-search/src/test/java/com/yahoo/search/federation/sourceref/SourceRefResolverTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/federation/sourceref/SourceRefResolverTestCase.java @@ -3,21 +3,16 @@ package com.yahoo.search.federation.sourceref; import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; -import com.yahoo.prelude.IndexFacts; -import com.yahoo.prelude.IndexModel; import com.yahoo.search.searchchain.model.federation.FederationOptions; import org.junit.jupiter.api.Test; -import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.TreeMap; import static com.yahoo.search.federation.sourceref.SearchChainResolverTestCase.emptySourceToProviderMap; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; /** * Test for SourceRefResolver. @@ -47,49 +42,38 @@ public class SourceRefResolverTestCase { @Test void lookup_search_chain() throws Exception { - Set<SearchChainInvocationSpec> searchChains = resolve(cluster1); + List<ResolveResult> searchChains = resolve(cluster1); assertEquals(1, searchChains.size()); assertTrue(searchChainIds(searchChains).contains(cluster1)); } @Test void lookup_search_chains_for_document1() throws Exception { - Set<SearchChainInvocationSpec> searchChains = resolve("document1"); + List<ResolveResult> searchChains = resolve("document1"); assertEquals(2, searchChains.size()); assertTrue(searchChainIds(searchChains).containsAll(List.of(cluster1, cluster2))); } @Test void error_when_document_gives_cluster_without_matching_search_chain() { - try { - resolve("document3"); - fail("Expected exception"); - } catch (UnresolvedSearchChainException e) { - assertEquals("Failed to resolve cluster search chain 'cluster3' " + - "when using source ref 'document3' as a document name.", - e.getMessage()); - } + List<ResolveResult> result = resolve("document3"); + + assertEquals("Failed to resolve cluster search chain 'cluster3' " + + "when using source ref 'document3' as a document name.", + result.get(0).errorMsg()); } @Test void error_when_no_document_or_search_chain() { - try { - resolve("document4"); - fail("Expected exception"); - } catch (UnresolvedSearchChainException e) { - assertEquals("Could not resolve source ref 'document4'.", e.getMessage()); - } + List<ResolveResult> results = resolve("document4"); + assertEquals("Could not resolve source ref 'document4'.", results.get(0).errorMsg()); } - private List<String> searchChainIds(Set<SearchChainInvocationSpec> searchChains) { - List<String> names = new ArrayList<>(); - for (SearchChainInvocationSpec searchChain : searchChains) { - names.add(searchChain.searchChainId.stringValue()); - } - return names; + private List<String> searchChainIds(Collection<ResolveResult> searchChains) { + return searchChains.stream().map(r -> r.invocationSpec().searchChainId.stringValue()).toList(); } - private Set<SearchChainInvocationSpec> resolve(String documentName) throws UnresolvedSearchChainException { + private List<ResolveResult> resolve(String documentName) { return sourceRefResolver.resolve(ComponentSpecification.fromString(documentName), emptySourceToProviderMap()); } diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java index c4b8c9f2044..027152bfd69 100644 --- a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java @@ -55,14 +55,13 @@ public class ValidateFuzzySearcherTestCase { searcher = new ValidateFuzzySearcher(); } - private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength) { - return "select * from sources * where " + attribute + - " contains ({maxEditDistance:" + maxEditDistance + ", prefixLength:" + prefixLength +"}" + - "fuzzy(\"" + query + "\"))"; + private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength, boolean prefixMatch) { + return "select * from sources * where %s contains ({maxEditDistance:%d,prefixLength:%d,prefix:%b}fuzzy(\"%s\"))" + .formatted(attribute, maxEditDistance, prefixLength, prefixMatch, query); } private String makeQuery(String attribute, String query) { - return makeQuery(attribute, query, 2, 0); + return makeQuery(attribute, query, 2, 0, false); } @@ -76,7 +75,7 @@ public class ValidateFuzzySearcherTestCase { if (validAttributes.contains(attribute)) { assertNull(r.hits().getError()); } else { - assertErrMsg("FUZZY(fuzzy,2,0) " + attribute + ":fuzzy field is not a string attribute", r); + assertErrMsg("FUZZY(fuzzy,2,0,false) " + attribute + ":fuzzy field is not a string attribute", r); } } } @@ -85,28 +84,28 @@ public class ValidateFuzzySearcherTestCase { void testInvalidEmptyStringQuery() { String q = makeQuery("string_single", ""); Result r = doSearch(searcher, q); - assertErrMsg("FUZZY(,2,0) string_single: fuzzy query must be non-empty", r); + assertErrMsg("FUZZY(,2,0,false) string_single: fuzzy query must be non-empty", r); } @Test void testInvalidQueryWrongMaxEditDistance() { - String q = makeQuery("string_single", "fuzzy", -1, 0); + String q = makeQuery("string_single", "fuzzy", -1, 0, false); Result r = doSearch(searcher, q); - assertErrMsg("FUZZY(fuzzy,-1,0) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r); + assertErrMsg("FUZZY(fuzzy,-1,0,false) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r); } @Test void testInvalidQueryWrongPrefixLength() { - String q = makeQuery("string_single", "fuzzy", 2, -1); + String q = makeQuery("string_single", "fuzzy", 2, -1, true); Result r = doSearch(searcher, q); - assertErrMsg("FUZZY(fuzzy,2,-1) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r); + assertErrMsg("FUZZY(fuzzy,2,-1,true) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r); } @Test void testInvalidQueryWrongAttributeName() { String q = makeQuery("wrong_name", "fuzzy"); Result r = doSearch(searcher, q); - assertErrMsg("FUZZY(fuzzy,2,0) wrong_name:fuzzy field is not a string attribute", r); + assertErrMsg("FUZZY(fuzzy,2,0,false) wrong_name:fuzzy field is not a string attribute", r); } private static void assertErrMsg(String message, Result r) { diff --git a/container-search/src/test/java/com/yahoo/search/significance/model/en.json b/container-search/src/test/java/com/yahoo/search/significance/model/en.json new file mode 100644 index 00000000000..50bae5e3451 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/significance/model/en.json @@ -0,0 +1,14 @@ +{ + "version" : "1.0", + "id" : "test::1", + "description" : "desc", + "corpus-size" : 10, + "language" : "en", + "word-count" : 4, + "frequencies" : { + "usa" : 2, + "hello": 3, + "world": 5, + "test": 2 + } +} diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java new file mode 100644 index 00000000000..890db3abb51 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -0,0 +1,153 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.significance.test; + +import com.yahoo.component.chain.Chain; +import com.yahoo.language.Language; +import com.yahoo.language.significance.SignificanceModel; +import com.yahoo.language.significance.SignificanceModelRegistry; +import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.significance.SignificanceSearcher; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.HashMap; + + +import static com.yahoo.test.JunitCompat.assertEquals; + +/** + * Tests significance term in the search chain. + * + * @author MariusArhaug + */ +public class SignificanceSearcherTest { + SignificanceModelRegistry significanceModelRegistry; + SignificanceSearcher searcher; + + public SignificanceSearcherTest() { + HashMap<Language, Path> map = new HashMap<>(); + map.put(Language.ENGLISH, Path.of("src/test/java/com/yahoo/search/significance/model/en.json")); + + significanceModelRegistry = new DefaultSignificanceModelRegistry(map); + searcher = new SignificanceSearcher(significanceModelRegistry); + } + + private Execution createExecution(SignificanceSearcher searcher) { + return new Execution(new Chain<>(searcher), Execution.Context.createContextStub()); + } + + private Execution createExecution() { + return new Execution(new Chain<>(), Execution.Context.createContextStub()); + } + + @Test + void testSignificanceValueOnSimpleQuery() { + + Query q = new Query(); + AndItem root = new AndItem(); + WordItem tmp; + tmp = new WordItem("Hello", true); + root.addItem(tmp); + tmp = new WordItem("world", true); + root.addItem(tmp); + + q.getModel().getQueryTree().setRoot(root); + + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get(); + var helloFrequency = model.documentFrequency("Hello"); + var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); + + var worldFrequency = model.documentFrequency("world"); + var worldSignificanceValue = SignificanceSearcher.calculateIDF(worldFrequency.corpusSize(), worldFrequency.frequency()); + + Result r = createExecution(searcher).search(q); + + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + WordItem w0 = (WordItem) root.getItem(0); + WordItem w1 = (WordItem) root.getItem(1); + + assertEquals(helloSignificanceValue, w0.getSignificance()); + assertEquals(worldSignificanceValue, w1.getSignificance()); + + } + + @Test + void testSignificanceValueOnRecursiveQuery() { + Query q = new Query(); + AndItem root = new AndItem(); + WordItem child1 = new WordItem("hello", true); + + AndItem child2 = new AndItem(); + WordItem child2_1 = new WordItem("test", true); + + AndItem child3 = new AndItem(); + AndItem child3_1 = new AndItem(); + WordItem child3_1_1 = new WordItem("usa", true); + + root.addItem(child1); + root.addItem(child2); + root.addItem(child3); + + child2.addItem(child2_1); + child3.addItem(child3_1); + child3_1.addItem(child3_1_1); + + q.getModel().getQueryTree().setRoot(root); + + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get(); + var helloFrequency = model.documentFrequency("hello"); + var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); + + var testFrequency = model.documentFrequency("test"); + var testSignificanceValue = SignificanceSearcher.calculateIDF(testFrequency.corpusSize(), testFrequency.frequency()); + + + + Result r = createExecution(searcher).search(q); + + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + WordItem w0 = (WordItem) root.getItem(0); + WordItem w1 = (WordItem) ((AndItem) root.getItem(1)).getItem(0); + WordItem w3 = (WordItem) ((AndItem) ((AndItem) root.getItem(2)).getItem(0)).getItem(0); + + assertEquals(helloSignificanceValue, w0.getSignificance()); + assertEquals(testSignificanceValue, w1.getSignificance()); + assertEquals(SignificanceSearcher.calculateIDF(10, 2), w3.getSignificance()); + + } + + @Test + void testSignificanceValueOnEmptyQuery() { + Query q = new Query(); + q.getModel().setLanguage(Language.NORWEGIAN_BOKMAL); + AndItem root = new AndItem(); + WordItem tmp; + tmp = new WordItem("Hei", true); + root.addItem(tmp); + tmp = new WordItem("Verden", true); + root.addItem(tmp); + + + q.getModel().getQueryTree().setRoot(root); + Result r = createExecution(searcher).search(q); + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + + WordItem w0 = (WordItem) root.getItem(0); + WordItem w1 = (WordItem) root.getItem(1); + + Result r0 = createExecution().search(q); + root = (AndItem) r0.getQuery().getModel().getQueryTree().getRoot(); + + WordItem w0_0 = (WordItem) root.getItem(0); + WordItem w0_1 = (WordItem) root.getItem(1); + + assertEquals(w0_0.getSignificance(), w0.getSignificance()); + assertEquals(w0_1.getSignificance(), w1.getSignificance()); + + } +} diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java index 20ca81234a6..b5e2839c4c0 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java @@ -464,7 +464,12 @@ public class VespaSerializerTestCase { @Test void testFuzzyAnnotations() { + parseAndConfirm("foo contains ({maxEditDistance:3}fuzzy(\"a\"))"); parseAndConfirm("foo contains ({maxEditDistance:3,prefixLength:5}fuzzy(\"a\"))"); + parseAndConfirm("foo contains ({maxEditDistance:3,prefixLength:5,prefix:true}fuzzy(\"a\"))"); + parseAndConfirm("foo contains ({prefixLength:5,prefix:true}fuzzy(\"a\"))"); + parseAndConfirm("foo contains ({maxEditDistance:3,prefix:true}fuzzy(\"a\"))"); + parseAndConfirm("foo contains ({prefix:true}fuzzy(\"a\"))"); } @Test diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index 29a651aabf4..91f5984481a 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -437,23 +437,27 @@ public class YqlParserTestCase { QueryTree x = parse("select foo from bar where baz contains fuzzy(\"a b\")"); Item root = x.getRoot(); assertSame(FuzzyItem.class, root.getClass()); - assertEquals("baz", ((FuzzyItem) root).getIndexName()); - assertEquals("a b", ((FuzzyItem) root).stringValue()); - assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, ((FuzzyItem) root).getMaxEditDistance()); - assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, ((FuzzyItem) root).getPrefixLength()); + var fuzzy = (FuzzyItem) root; + assertEquals("baz", fuzzy.getIndexName()); + assertEquals("a b", fuzzy.stringValue()); + assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, fuzzy.getMaxEditDistance()); + assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, fuzzy.getPrefixLength()); + assertFalse(fuzzy.isPrefixMatch()); } @Test void testFuzzyAnnotations() { QueryTree x = parse( - "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10}fuzzy(\"a b\"))" + "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10, prefix: true}fuzzy(\"a b\"))" ); Item root = x.getRoot(); assertSame(FuzzyItem.class, root.getClass()); - assertEquals("baz", ((FuzzyItem) root).getIndexName()); - assertEquals("a b", ((FuzzyItem) root).stringValue()); - assertEquals(3, ((FuzzyItem) root).getMaxEditDistance()); - assertEquals(10, ((FuzzyItem) root).getPrefixLength()); + var fuzzy = (FuzzyItem) root; + assertEquals("baz", fuzzy.getIndexName()); + assertEquals("a b", fuzzy.stringValue()); + assertEquals(3, fuzzy.getMaxEditDistance()); + assertEquals(10, fuzzy.getPrefixLength()); + assertTrue(fuzzy.isPrefixMatch()); } @Test diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java index f4571f04a5d..f863816dab2 100644 --- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java +++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java @@ -671,8 +671,39 @@ public class SelectTestCase { QueryTree x = parseWhere("{ \"contains\": [\"description\", { \"fuzzy\": [\"a b\"] }] }"); Item root = x.getRoot(); assertSame(FuzzyItem.class, root.getClass()); - assertEquals("description", ((FuzzyItem) root).getIndexName()); - assertEquals("a b", ((FuzzyItem) root).stringValue()); + var fuzzy = (FuzzyItem) root; + assertEquals("description", fuzzy.getIndexName()); + assertEquals("a b", fuzzy.stringValue()); + assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, fuzzy.getMaxEditDistance()); + assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, fuzzy.getPrefixLength()); + assertFalse(fuzzy.isPrefixMatch()); + } + + @Test + void fuzzy_with_annotations() { + var where = """ + { + "contains": ["description", { + "fuzzy": { + "children": ["a b"], + "attributes": { + "maxEditDistance": 3, + "prefixLength": 10, + "prefix": true + } + } + }] + } + """; + QueryTree x = parseWhere(where); + Item root = x.getRoot(); + assertSame(FuzzyItem.class, root.getClass()); + var fuzzy = (FuzzyItem) root; + assertEquals("description", fuzzy.getIndexName()); + assertEquals("a b", fuzzy.stringValue()); + assertEquals(3, fuzzy.getMaxEditDistance()); + assertEquals(10, fuzzy.getPrefixLength()); + assertTrue(fuzzy.isPrefixMatch()); } //------------------------------------------------------------------- grouping tests |