summaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java87
1 files changed, 87 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java b/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java
new file mode 100644
index 00000000000..12880d0e1c8
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java
@@ -0,0 +1,87 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.document;
+
+/**
+ * class representing the character normalization
+ * we want to do on query and indexed text.
+ * Levels are strict subsets, so doing accent
+ * removal means doing codepoint normalizing
+ * and case normalizing also.
+ */
+// TODO: Missing author
+public class NormalizeLevel {
+
+ /**
+ * The current levels are as follows:
+ * NONE: no changes to input text
+ * CODEPOINT: convert text into Unicode
+ * Normalization Form Compatibility Composition
+ * LOWERCASE: also convert text into lowercase letters
+ * ACCENT: do both above and remove accents on characters
+ */
+ public enum Level {
+ NONE, CODEPOINT, LOWERCASE, ACCENT
+ }
+
+ private boolean userSpecified = false;
+ private Level level = Level.ACCENT;
+
+ /**
+ * Returns whether accents should be removed from text
+ */
+ public boolean doRemoveAccents() { return level == Level.ACCENT; }
+
+ /**
+ * Construct a default (full) normalizelevel,
+ */
+ public NormalizeLevel() {}
+
+ /**
+ * Construct for a specific level, possibly user specified
+ *
+ * @param level which level to use
+ * @param fromUser whether this was specified by the user
+ */
+ public NormalizeLevel(Level level, boolean fromUser) {
+ this.level = level;
+ this.userSpecified = fromUser;
+ }
+
+ /**
+ * Change the current level to CODEPOINT as inferred
+ * by other features' needs. If the current level
+ * was user specified it will not change; also this
+ * will not increase the level.
+ */
+ public void inferCodepoint() {
+ if (userSpecified) {
+ // ignore inferred changes if user specified something
+ return;
+ }
+ // do not increase level
+ if (level != Level.NONE) level = Level.CODEPOINT;
+ }
+
+ /**
+ * Change the current level to LOWERCASE as inferred
+ * by other features' needs. If the current level
+ * was user specified it will not change; also this
+ * will not increase the level.
+ */
+ public void inferLowercase() {
+ if (userSpecified) {
+ // ignore inferred changes if user specified something
+ return;
+ }
+ // do not increase level
+ if (level == Level.NONE) return;
+ if (level == Level.CODEPOINT) return;
+
+ level = Level.LOWERCASE;
+ }
+
+ public Level getLevel() {
+ return level;
+ }
+
+}