Improve javadoc

author: Jon Bratseth <bratseth@vespa.ai> 2024-05-14 20:29:05 -0600
committer: Jon Bratseth <bratseth@vespa.ai> 2024-05-14 20:29:05 -0600
commit: fae1697e7cddae0c4232862f18ad0f6f12201852 (patch)
tree: 5df21cf840be09cadf9d21544d2e605231239340
parent: 63c765e1e33e02cd28f15f1a7bfad01f5f63fd43 (diff)
2 files changed, 11 insertions, 10 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
index 7d258f71ebd..5e9c3dcc6ea 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
@@ -335,7 +335,7 @@ public abstract class AbstractParser implements CustomParser {
     /**
      * Segments a token
      *
-     * @param indexName the index name which preceeded this token, or null if none
+     * @param indexName the index name which preceded this token, or null if none
      * @param token the token to segment
      * @param quoted whether this segment is within quoted text
      * @return the resulting item
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Segmenter.java b/linguistics/src/main/java/com/yahoo/language/process/Segmenter.java
index 7e7ee44bf74..5240737ae45 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Segmenter.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Segmenter.java
@@ -6,22 +6,23 @@ import com.yahoo.language.Language;
 import java.util.List;
 
 /**
- * Interface providing segmentation, i.e. splitting of CJK character blocks into separate tokens. This is primarily a
- * convenience feature for users who don't need full tokenization (or who use a separate tokenizer and only need CJK
- * processing).
+ * A segmnenter splits a string into separate segments (such as words) without applying any further
+ * processing (such as stemming) on each segment.
+ *
+ * This is useful when token processing should be done separately from segmentation, such as in
+ * linguistic processing of queries, where token processing depends on field settings in a specific
+ * schema, while segmentation only depends on language and happens before schema-specific processing.
  *
  * @author Mathias Mølster Lidal
  */
 public interface Segmenter {
 
     /**
-     * Split input-string into tokens, and returned a list of tokens in unprocessed form (i.e. lowercased, normalized
-     * and stemmed if applicable, see @link{StemMode} for list of stemming options). It is assumed that the input only
-     * contains word-characters, any punctuation and spacing tokens will be removed.
+     * Returns a list of segments produced from a string.
      *
-     * @param input the text to segment.
-     * @param language language of input text.
-     * @return the list of segments.
+     * @param input the text to segment
+     * @param language the language of the input text
+     * @return the resulting list of segments
      * @throws ProcessingException if an exception is encountered during processing
      */
     List<String> segment(String input, Language language);
author	Jon Bratseth <bratseth@vespa.ai>	2024-05-14 20:29:05 -0600
committer	Jon Bratseth <bratseth@vespa.ai>	2024-05-14 20:29:05 -0600
commit	fae1697e7cddae0c4232862f18ad0f6f12201852 (patch)
tree	5df21cf840be09cadf9d21544d2e605231239340
parent	63c765e1e33e02cd28f15f1a7bfad01f5f63fd43 (diff)