// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.process; /** * A single token produced by the tokenizer. * * @author Mathias Mølster Lidal */ public interface Token { /** Returns the type of this token - word, space or punctuation etc. */ TokenType getType(); /** Returns the original form of this token */ String getOrig(); /** Returns the number of stem forms available for this token. */ int getNumStems(); /** Returns the stem at position i */ String getStem(int i); /** * Returns the number of components, if this token is a compound word * (e.g. german "kommunikationsfehler". Otherwise, return 0 * * @return number of components, or 0 if none */ int getNumComponents(); /** Returns a component token of this */ Token getComponent(int i); /** Returns the offset position of this token */ long getOffset(); /** Returns the script of this token */ TokenScript getScript(); /** * Returns the token string in a form suitable for indexing: The * most lowercased variant of the most processed token form available, * If called on a compound token this returns a lowercased form of the * entire word. * If this is a special token with a configured replacement, * this will return the replacement token. */ String getTokenString(); /** Returns whether this is an instance of a declared special token (e.g. c++) */ boolean isSpecialToken(); /** Whether this token should be indexed */ boolean isIndexable(); }