// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude; import com.yahoo.language.process.StemMode; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Set; /** * Information about configured settings of a field or field collection (an actual index or not) in a search definition. * There are two types of settings: * * addCommand sets both types. * * @author Steinar Knutsen * @author bratseth */ public class Index { /** The null index - don't use this for name lookups */ public static final Index nullIndex = new Index("(null)"); private final String name; private final List aliases = new ArrayList<>(); // The state resulting from adding commands to this (using addCommand) private boolean tensor = false; private boolean uriIndex = false; private boolean hostIndex = false; private StemMode stemMode = StemMode.NONE; private boolean isAttribute = false; private boolean isIndex = false; private boolean isDefaultPosition = false; private boolean dynamicSummary=false; private boolean highlightSummary=false; private boolean lowercase = false; private boolean plainTokens = false; private boolean multivalue = false; private boolean fastSearch = false; private boolean normalize = false; private boolean literalBoost = false; private boolean numerical = false; private boolean integer = false; private boolean string = false; private boolean predicate = false; private long predicateUpperBound = Long.MAX_VALUE; private long predicateLowerBound = Long.MIN_VALUE; /** True if this is an exact index - which should match tokens containing any characters */ private boolean exact = false; private boolean isNGram = false; private int gramSize = 2; /** Whether implicit phrases should lead to a phrase item or an and item. */ private Boolean phraseSegmenting = false; /** The string terminating an exact token in this index, or null to use the default (space) */ private String exactTerminator = null; /** Commands which are not converted into a field */ private final Set commands = new java.util.HashSet<>(); /** All the commands added to this, including those converted to fields above */ private final List allCommands = new java.util.ArrayList<>(); private static final String CMD_STRING = "string"; public Index(String name) { this.name = name; } public void addAlias(String alias) { aliases.add(alias); } /** Returns an unmodifiable list of the aliases of this index (not including the index proper name) */ public List aliases() { return Collections.unmodifiableList(aliases); } /** * Returns the canonical name of this index, unless it * is the null index, which doesn't have a canonical name */ public String getName() { return name; } public boolean isUriIndex() { return uriIndex; } public boolean isDefaultPosition() { return isDefaultPosition; } public void setDefaultPosition(boolean v) { isDefaultPosition = v; } public void setUriIndex(boolean uriIndex) { this.uriIndex = uriIndex; } public boolean isHostIndex() { return hostIndex; } public void setHostIndex(boolean hostIndex) { this.hostIndex = hostIndex; } public StemMode getStemMode() { return stemMode; } public void setStemMode(StemMode stemMode) { this.stemMode = stemMode; } public void setStemMode(String name) { this.stemMode = StemMode.valueOf(name); } /** Adds a type or untyped command string to this */ public Index addCommand(String command) { allCommands.add(command); if (command.startsWith("type tensor(") || command.startsWith("type tensor<")) { // TODO: Type info can replace numerical, predicate, multivalue setTensor(true); } else if (command.equals("integer")) { setInteger(true); } else if ("fullurl".equals(command)) { setUriIndex(true); } else if ("urlhost".equals(command)) { setHostIndex(true); } else if (command.startsWith("stem ")) { setStemMode(command.substring(5)); } else if (command.startsWith("stem:")) { setStemMode(command.substring(5)); } else if ("stem".equals(command)) { setStemMode(StemMode.SHORTEST); } else if ("word".equals(command)) { setExact(true, null); } else if ("exact".equals(command)) { setExact(true, " "); } else if ("dynteaser".equals(command)) { setDynamicSummary(true); } else if ("highlight".equals(command)) { setHighlightSummary(true); } else if ("lowercase".equals(command)) { setLowercase(true); } else if (command.startsWith("exact ")) { setExact(true, command.substring(6)); } else if (command.startsWith("ngram ")) { setNGram(true, Integer.parseInt(command.substring(6))); } else if (command.equals("attribute")) { setAttribute(true); } else if (command.equals("index")) { setIndex(true); } else if (command.equals("default-position")) { setDefaultPosition(true); } else if (command.equals("plain-tokens")) { setPlainTokens(true); } else if (command.equals("multivalue")) { setMultivalue(true); } else if (command.equals("fast-search")) { setFastSearch(true); } else if (command.equals("normalize")) { setNormalize(true); } else if (command.equals("literal-boost")) { setLiteralBoost(true); } else if (command.equals("numerical")) { setNumerical(true); } else if (command.equals("predicate")) { setPredicate(true); } else if (command.startsWith("predicate-bounds ")) { setPredicateBounds(command.substring(17)); } else if (command.equals("phrase-segmenting")) { setPhraseSegmenting(true); } else if (command.startsWith("phrase-segmenting ")) { setPhraseSegmenting(Boolean.parseBoolean(command.substring("phrase-segmenting ".length()))); } else if (command.equals(CMD_STRING)) { setString(true); } else { commands.add(command); } return this; } private void setTensor(boolean tensor) { this.tensor = tensor; } public boolean isTensor() { return tensor; } private void setPredicateBounds(String bounds) { if ( ! bounds.startsWith("[..")) { predicateLowerBound = Long.parseLong(bounds.substring(1, bounds.indexOf(".."))); } else { predicateLowerBound = Long.MIN_VALUE; } if ( ! bounds.endsWith("..]")) { predicateUpperBound = Long.parseLong(bounds.substring(bounds.indexOf("..") + 2, bounds.length() - 1)); } else { predicateUpperBound = Long.MAX_VALUE; } } /** Sets whether terms in this field are lowercased when indexing. */ public void setLowercase(boolean lowercase) { this.lowercase = lowercase; } /** Returns whether terms in this field are lowercased when indexing. */ public boolean isLowercase() { return lowercase; } /** Returns an iterator of all the untyped commands of this */ public Iterator commandIterator() { return commands.iterator(); } /** Checks whether this has the given (exact) untyped command string */ public boolean hasCommand(String commandString) { return commands.contains(commandString); } /** * Set whether this index should match any kind of characters * * @param exact true to make this index match any kind of characters, not just word and digit ones * @param terminator the terminator of an exact sequence (one or more characters), * or null to use the default (space) */ public void setExact(boolean exact, String terminator) { this.exact = exact; this.exactTerminator = terminator; } /** Returns whether this is an exact index, which should match tokens containing any characters */ public boolean isExact() { return exact; } /** Returns the string terminating an exact sequence in this index, or null to use the default (space) */ public String getExactTerminator() { return exactTerminator; } /** Returns true if this is an ngram index (default: false) */ public boolean isNGram() { return isNGram; } /** Returns the gram size. Only used if isNGram is true (default: 2)*/ public int getGramSize() { return gramSize; } public void setNGram(boolean nGram,int gramSize) { this.isNGram = nGram; this.gramSize = gramSize; } public void setDynamicSummary(boolean dynamicSummary) { this.dynamicSummary=dynamicSummary; } public boolean getDynamicSummary() { return dynamicSummary; } public void setHighlightSummary(boolean highlightSummary) { this.highlightSummary=highlightSummary; } public boolean getHighlightSummary() { return highlightSummary; } /** Returns true if this is the null index */ // TODO: Replace by == Index.null public boolean isNull() { return "(null)".equals(name); } public boolean isAttribute() { return isAttribute; } public void setAttribute(boolean isAttribute) { this.isAttribute = isAttribute; } public boolean isIndex() { return isIndex; } public void setIndex(boolean isIndex) { this.isIndex = isIndex; } public boolean hasPlainTokens() { return plainTokens; } public void setPlainTokens(boolean plainTokens) { this.plainTokens = plainTokens; } public void setMultivalue(boolean multivalue) { this.multivalue = multivalue; } /** Returns true if this is a multivalue field */ public boolean isMultivalue() { return multivalue; } public void setFastSearch(boolean fastSearch) { this.fastSearch = fastSearch; } /** Returns true if this is an attribute with fastsearch turned on */ public boolean isFastSearch() { return fastSearch; } public void setNormalize(boolean normalize) { this.normalize = normalize; } /** Returns true if the content of this index is normalized */ public boolean getNormalize() { return normalize; } public boolean getLiteralBoost() { return literalBoost; } public void setLiteralBoost(boolean literalBoost) { this.literalBoost = literalBoost; } public void setNumerical(boolean numerical) { this.numerical = numerical; } public boolean isNumerical() { return numerical; } public void setString(boolean string) { this.string = string; } public boolean isString() { return string; } public void setInteger(boolean integer) { this.integer = integer; } public boolean isInteger() { return integer; } public void setPredicate(boolean isPredicate) { this.predicate = isPredicate; } public boolean isPredicate() { return predicate; } public long getPredicateUpperBound() { return predicateUpperBound; } public long getPredicateLowerBound() { return predicateLowerBound; } public boolean getPhraseSegmenting() { return phraseSegmenting; } public boolean setPhraseSegmenting(boolean phraseSegmenting) { return this.phraseSegmenting = phraseSegmenting; } /** Returns all the literal command strings given as arguments to addCommand in this instance */ public List allCommands() { return allCommands; } @Override public String toString() { return "index '" + getName() + "'"; } }