diff options
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java')
-rw-r--r-- | config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java b/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java new file mode 100644 index 00000000000..12880d0e1c8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java @@ -0,0 +1,87 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +/** + * class representing the character normalization + * we want to do on query and indexed text. + * Levels are strict subsets, so doing accent + * removal means doing codepoint normalizing + * and case normalizing also. + */ +// TODO: Missing author +public class NormalizeLevel { + + /** + * The current levels are as follows: + * NONE: no changes to input text + * CODEPOINT: convert text into Unicode + * Normalization Form Compatibility Composition + * LOWERCASE: also convert text into lowercase letters + * ACCENT: do both above and remove accents on characters + */ + public enum Level { + NONE, CODEPOINT, LOWERCASE, ACCENT + } + + private boolean userSpecified = false; + private Level level = Level.ACCENT; + + /** + * Returns whether accents should be removed from text + */ + public boolean doRemoveAccents() { return level == Level.ACCENT; } + + /** + * Construct a default (full) normalizelevel, + */ + public NormalizeLevel() {} + + /** + * Construct for a specific level, possibly user specified + * + * @param level which level to use + * @param fromUser whether this was specified by the user + */ + public NormalizeLevel(Level level, boolean fromUser) { + this.level = level; + this.userSpecified = fromUser; + } + + /** + * Change the current level to CODEPOINT as inferred + * by other features' needs. If the current level + * was user specified it will not change; also this + * will not increase the level. + */ + public void inferCodepoint() { + if (userSpecified) { + // ignore inferred changes if user specified something + return; + } + // do not increase level + if (level != Level.NONE) level = Level.CODEPOINT; + } + + /** + * Change the current level to LOWERCASE as inferred + * by other features' needs. If the current level + * was user specified it will not change; also this + * will not increase the level. + */ + public void inferLowercase() { + if (userSpecified) { + // ignore inferred changes if user specified something + return; + } + // do not increase level + if (level == Level.NONE) return; + if (level == Level.CODEPOINT) return; + + level = Level.LOWERCASE; + } + + public Level getLevel() { + return level; + } + +} |