SpareCapacityMaintainer sketch

author: Jon Bratseth <bratseth@gmail.com> 2020-06-12 12:51:22 +0200
committer: Jon Bratseth <bratseth@gmail.com> 2020-06-12 12:51:22 +0200
commit: 7f7b6777514bf05916e2edcbc3e27b1bfd28906c (patch)
tree: c530cbc56b80eb5128d2d9254b92c0486923f0d4 /linguistics
parent: 9fc05281d6a79c26efe04edeb7604300f0c05845 (diff)
6 files changed, 35 insertions, 66 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
index d3f6fcf2ee3..93599fa7dbe 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
@@ -15,6 +15,7 @@ import java.util.logging.Logger;
 import java.util.logging.Level;
 
 public class OpenNlpTokenizer implements Tokenizer {
+
     private final static int SPACE_CODE = 32;
     private static final Logger log = Logger.getLogger(OpenNlpTokenizer.class.getName());
     private final Normalizer normalizer;
diff --git a/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java b/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
index ce0291c85e5..59ae664e79e 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/CharacterClasses.java
@@ -17,7 +17,7 @@ public class CharacterClasses {
         if (Character.isDigit(c) &&  ! isLatin(c)) return true; // Not considering these digits, so treat them as letters
         // if (c == '_') return true;
 
-        // Ticket 3864695, some CJK punctuation YST defined as word characters
+        // Some CJK punctuation defined as word characters
         if (c == '\u3008' || c == '\u3009' || c == '\u300a' || c == '\u300b' ||
             c == '\u300c' || c == '\u300d' || c == '\u300e' ||
             c == '\u300f' || c == '\u3010' || c == '\u3011') {
@@ -52,4 +52,5 @@ public class CharacterClasses {
     public boolean isLetterOrDigit(int c) {
         return isLetter(c) || isDigit(c);
     }
+
 }
diff --git a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java
index 94fd0e08493..aa7ae59edf9 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java
@@ -39,12 +39,8 @@ public class GramSplitter {
      * @throws IllegalArgumentException if n is less than 1
      */
     public GramSplitterIterator split(String input, int n) {
-        if (input == null) {
-            throw new NullPointerException("input cannot be null");
-        }
-        if (n < 1) {
-            throw new IllegalArgumentException("n (gram size) cannot be smaller than 1, was " + n);
-        }
+        if (input == null) throw new NullPointerException("input cannot be null");
+        if (n < 1) throw new IllegalArgumentException("n (gram size) cannot be smaller than 1, was " + n);
         return new GramSplitterIterator(input, n, characterClasses);
     }
 
@@ -52,29 +48,19 @@ public class GramSplitter {
 
         private final CharacterClasses characterClasses;
 
-        /**
-         * Text to split
-         */
+        /** Text to split */
         private final String input;
 
-        /**
-         * Gram size
-         */
+        /** Gram size */
         private final int n;
 
-        /**
-         * Current index
-         */
+        /** Current index */
         private int i = 0;
 
-        /**
-         * Whether the last thing that happened was being on a separator (including the start of the string)
-         */
+        /** Whether the last thing that happened was being on a separator (including the start of the string) */
         private boolean isFirstAfterSeparator = true;
 
-        /**
-         * The next gram or null if not determined yet
-         */
+        /** The next gram or null if not determined yet */
         private Gram nextGram = null;
 
         public GramSplitterIterator(String input, int n, CharacterClasses characterClasses) {
@@ -85,9 +71,7 @@ public class GramSplitter {
 
         @Override
         public boolean hasNext() {
-            if (nextGram != null) {
-                return true;
-            }
+            if (nextGram != null) return true;
             nextGram = findNext();
             return nextGram != null;
         }
@@ -95,12 +79,10 @@ public class GramSplitter {
         @Override
         public Gram next() {
             Gram currentGram = nextGram;
-            if (currentGram == null) {
+            if (currentGram == null)
                 currentGram = findNext();
-            }
-            if (currentGram == null) {
+            if (currentGram == null)
                 throw new NoSuchElementException("No next gram at position " + i);
-            }
             nextGram = null;
             return currentGram;
         }
@@ -111,24 +93,21 @@ public class GramSplitter {
                 i++;
                 isFirstAfterSeparator = true;
             }
-            if (i >= input.length()) {
-                return null;
-            }
+            if (i >= input.length()) return null;
 
             String gram = input.substring(i, Math.min(i + n, input.length()));
             int nonWordChar = indexOfNonWordChar(gram);
-            if (nonWordChar == 0) {
-                throw new RuntimeException("Programming error");
-            }
-            if (nonWordChar > 0) {
+            if (nonWordChar == 0) throw new RuntimeException("Programming error");
+
+            if (nonWordChar > 0)
                 gram = gram.substring(0, nonWordChar);
-            }
 
             if (gram.length() == n) { // normal case: got a full length gram
                 i++;
                 isFirstAfterSeparator = false;
                 return new Gram(i - 1, gram.length());
-            } else { // gram is too short due either to a non-word separator or end of string
+            }
+            else { // gram is too short due either to a non-word separator or end of string
                 if (isFirstAfterSeparator) { // make a gram anyway
                     i++;
                     isFirstAfterSeparator = false;
@@ -143,9 +122,8 @@ public class GramSplitter {
 
         private int indexOfNonWordChar(String s) {
             for (int i = 0; i < s.length(); i++) {
-                if (!characterClasses.isLetterOrDigit(s.codePointAt(i))) {
+                if ( ! characterClasses.isLetterOrDigit(s.codePointAt(i)))
                     return i;
-                }
             }
             return -1;
         }
@@ -162,9 +140,8 @@ public class GramSplitter {
          */
         public List<String> toExtractedList() {
             List<String> gramList = new ArrayList<>();
-            while (hasNext()) {
+            while (hasNext())
                 gramList.add(next().extractFrom(input));
-            }
             return Collections.unmodifiableList(gramList);
         }
     }
@@ -189,31 +166,19 @@ public class GramSplitter {
             return length;
         }
 
-        /**
-         * Returns this gram as a string from the input string
-         */
+        /** Returns this gram as a string from the input string */
         public String extractFrom(String input) {
             return input.substring(start, start + length);
         }
 
         @Override
         public boolean equals(Object o) {
-            if (this == o) {
-                return true;
-            }
-            if (!(o instanceof Gram)) {
-                return false;
-            }
+            if (this == o) return true;
+            if ( ! (o instanceof Gram)) return false;
 
             Gram gram = (Gram)o;
-
-            if (length != gram.length) {
-                return false;
-            }
-            if (start != gram.start) {
-                return false;
-            }
-
+            if (length != gram.length) return false;
+            if (start != gram.start) return false;
             return true;
         }
 
@@ -223,5 +188,7 @@ public class GramSplitter {
             result = 31 * result + length;
             return result;
         }
+
     }
+
 }
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Normalizer.java b/linguistics/src/main/java/com/yahoo/language/process/Normalizer.java
index 0e34f88f4ca..044d249f077 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Normalizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Normalizer.java
@@ -9,11 +9,11 @@ package com.yahoo.language.process;
 public interface Normalizer {
 
     /**
-     * <p>NFKC normalizes a String.</p>
+     * NFKC normalizes a String.
      *
-     * @param input String to normalize.
-     * @return The normalized String.
-     * @throws ProcessingException If underlying library throws an Exception.
+     * @param input the string to normalize
+     * @return the normalized string
+     * @throws ProcessingException if underlying library throws an Exception
      */
     String normalize(String input);
 
diff --git a/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java b/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java
index 941afa07347..752992f5a26 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java
@@ -2,7 +2,7 @@
 package com.yahoo.language.process;
 
 /**
- * <p>Exception class indicating that a fatal error occured during linguistic processing.</p>
+ * Exception class indicating that a fatal error occured during linguistic processing.
  *
  * @author Simon Thoresen Hult
  */
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Transformer.java b/linguistics/src/main/java/com/yahoo/language/process/Transformer.java
index 46f3c060d4e..4927edc98c9 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Transformer.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Transformer.java
@@ -13,8 +13,8 @@ public interface Transformer {
     /**
      * Remove accents from input text.
      *
-     * @param input    text to transform.
-     * @param language language of input text.
+     * @param input    text to transform
+     * @param language language of input text
      * @return text with accents removed, or input-text if the feature is unavailable
      * @throws ProcessingException thrown if there is an exception stemming this input
      */
author	Jon Bratseth <bratseth@gmail.com>	2020-06-12 12:51:22 +0200
committer	Jon Bratseth <bratseth@gmail.com>	2020-06-12 12:51:22 +0200
commit	7f7b6777514bf05916e2edcbc3e27b1bfd28906c (patch)
tree	c530cbc56b80eb5128d2d9254b92c0486923f0d4 /linguistics
parent	9fc05281d6a79c26efe04edeb7604300f0c05845 (diff)