summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/text
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-search/src/main/java/com/yahoo/text
Publish
Diffstat (limited to 'container-search/src/main/java/com/yahoo/text')
-rw-r--r--container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java35
-rw-r--r--container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java124
-rw-r--r--container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java404
-rw-r--r--container-search/src/main/java/com/yahoo/text/interpretation/Modification.java43
-rw-r--r--container-search/src/main/java/com/yahoo/text/interpretation/Span.java349
-rw-r--r--container-search/src/main/java/com/yahoo/text/interpretation/package-info.java10
6 files changed, 965 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java b/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java
new file mode 100644
index 00000000000..60dd24c5ccc
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text.interpretation;
+
+// TODO: Javadoc
+// TODO: Eventually we need to define the set of classes available here
+
+public class AnnotationClass {
+
+ private String clazz;
+
+ public AnnotationClass(String clazz) {
+ this.clazz = clazz;
+ }
+
+ public String getClazz() {
+ return clazz;
+ }
+
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof AnnotationClass)) {
+ return false;
+ }
+ AnnotationClass aClass = (AnnotationClass)o;
+ return aClass.clazz == null ? clazz == null : clazz.equals(aClass.getClazz());
+ }
+
+ @Override
+ public int hashCode() {
+ return clazz == null ? 0 : clazz.hashCode();
+ }
+
+
+}
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java b/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java
new file mode 100644
index 00000000000..759cf2b173b
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text.interpretation;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An annotation is a description of a an area of text, with a given class. For example, an annotation for the
+ *
+ * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a>
+ */
+public class Annotations {
+
+
+ private Span span;
+
+ protected Map<String,Object> annotations;
+
+
+ /**
+ * Adds an annotation to the the the set of annotations.
+ */
+ public void put(String key,Object o) {
+ if(annotations == null) {
+ annotations = new HashMap<>();
+ }
+ annotations.put(key,o);
+ }
+
+ public Map<String,Object> getMap() {
+ if(annotations == null) {
+ return Collections.emptyMap();
+ } else {
+ return annotations;
+ }
+ }
+
+ public Annotations(Span span) {
+ this.span = span;
+ }
+
+ public Object get(String key) {
+ return getMap().get(key);
+ }
+
+ /**
+ * The span that this annotation is for.
+ */
+ public Span getSpan() {
+ return span;
+ }
+
+ /**
+ * The text this annotation is for.
+ */
+ public String getSubString() {
+ return span.getText();
+ }
+
+
+ /**
+ * Helper function to get a Double annotation.
+ * <p>
+ * This function first checks if the Object in a map is a <code>Number</code>, and then calls doubleValue() on it
+ * If it is not, then Double.parseDouble() is called on the string representation of the object. If the string
+ * is not parseable as a double, a NumberFormatException is thrown.
+ */
+ public Double getDouble(String key) {
+ Object o = getMap().get(key);
+ if(o instanceof Number) {
+ return ((Number)o).doubleValue();
+ } else if(o == null) {
+ return null;
+ } else {
+ return Double.parseDouble(o.toString());
+ }
+ }
+
+ /**
+ * Helper function to get a String from the Annotation. This function will simply call <code>toString()</code> on the
+ * object saved in the Annotation or return null if the object is null;
+ */
+ public String getString(String key) {
+ Object o = getMap().get(key);
+ if(o == null) {
+ return null;
+ } else {
+ return o.toString();
+ }
+ }
+
+ /**
+ * Helper function to get a Double annotation.
+ * <p>
+ * This function first checks if the Object in a map is a <code>Number</code>, and intValue() is called on it.
+ * If it is not, then Double.parseDouble() is called on the string representation of the object. If the string
+ * is not parseable as a double, a NumberFormatException is thrown.
+ */
+ public Integer getInteger(String key) {
+ Object o = getMap().get(key);
+ if(o == null) {
+ return null;
+ } else if(o instanceof Number) {
+ return ((Number)o).intValue();
+ } else {
+ return Integer.parseInt(o.toString());
+ }
+ }
+
+ /**
+ * Helper function to get a Boolean annotation.
+ */
+ public Boolean getBoolean(String key) {
+ Object o = getMap().get(key);
+ if(o == null || !(o instanceof Boolean)) {
+ return null;
+ } else {
+ return (Boolean) o;
+ }
+ }
+
+
+}
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java b/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java
new file mode 100644
index 00000000000..d80ff80f172
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java
@@ -0,0 +1,404 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text.interpretation;
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ *
+ * An interpretation of a text.
+ *
+ * This class it the main class to use when when querying and modifying annotations for a text.
+ *
+ * The interpretation consists of a tree of annotations, with the nodes in tree being Spans. An annotation
+ * is defined by its annotationClass ("person"), and by a key/value map of
+ * parameters for that annotationClass (if the person is an actor or other notable person).
+ *
+ * This class is the main class for querying and setting annotations, where modifying the span tree
+ * is not needed.
+ *
+ * @see Span
+ * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a>
+ */
+public class Interpretation {
+
+ private Modification modification;
+ private double probability;
+ private Span rootSpan;
+
+ public final static AnnotationClass INTERPRETATION_CLASS = new AnnotationClass("interpretation");
+
+
+ /**
+ * Creates a new interpretation and a new modification from the text,
+ * with the probability set to the default value(0.0).
+
+ */
+ public Interpretation(String text) {
+ this(text,0.0);
+ }
+
+ /**
+ * Creates a new interpretation and a new modification from the text, with the given probability.
+ */
+ public Interpretation(String text, double probabilty) {
+ this(new Modification(text),probabilty);
+ }
+
+
+ /**
+ * Creates a new interpretation based on the modification, with the probability set to the default value(0.0).
+ */
+ public Interpretation(Modification modification) {
+ this(modification,0.0);
+ }
+
+ /**
+ * Creates an interpretation based on the modification given.
+ */
+ public Interpretation(Modification modification,double probability) {
+ this.modification = modification;
+ rootSpan = new Span(modification);
+ setProbability(probability);
+ }
+
+
+ public Modification getModification() {
+ return modification;
+ }
+
+
+ /**
+ * The probability that this interpretation is correct.
+ * @return a value between 0.0 and 1.0 that gives the probability that this interpretation is correct
+ */
+ public double getProbability() {
+ return probability;
+ }
+
+ /**
+ * Sets he probability that this interpretation is the correct. The value is not normalized,
+ * meaning that it can have a value larger than 1.0.
+ *
+ * The value is used when sorting interpretations.
+ */
+ public void setProbability(double probability) {
+ if (probability < 0) {
+ probability = 0.0;
+ } else if (probability > 1.0) {
+ probability = 1.0;
+ }
+ this.probability = probability;
+
+ }
+
+ /** Returns the root of the tree representation of the interpretation */
+ public Span root() { return rootSpan; }
+
+
+ // Wrapper methods for Span
+
+ /**
+ * Return the annotation with the given annotationclass (and create it if necessary).
+ * @param annotationClass The class of the annotation
+ *
+ */
+ public Annotations annotate(String annotationClass) {
+ return annotate(new AnnotationClass(annotationClass));
+ }
+
+ /**
+ * Return the annotation with the given annotationclass (and create it if necessary).
+ * @param annotationClass The class of the annotation
+ *
+ */
+ public Annotations annotate(AnnotationClass annotationClass) {
+ return rootSpan.annotate(annotationClass);
+ }
+
+ /**
+ * Sets a key/value pair for an annotation. If an annotation of the class does not
+ * exist, a new is created.
+ *
+ * A shortcut for annotate(annotationClass).put(key,value)
+ * @param annotationClass class of the annotation
+ * @param key key of the property to set on the annotation
+ * @param value value of the property to set on the annotation
+ */
+ public void annotate(String annotationClass, String key, Object value) {
+ annotate(new AnnotationClass(annotationClass)).put(key,value);
+ }
+
+ /**
+ * Sets a key/value pair for an annotation. If an annotation of the class does not
+ * exist, a new is created.
+ *
+ * A shortcut for annotate(annotationClass).put(key,value)
+ * @param annotationClass class of the annotation
+ * @param key key of the property to set on the annotation
+ * @param value value of the property to set on the annotation
+ */
+ public void annotate(AnnotationClass annotationClass, String key, Object value) {
+ annotate(annotationClass).put(key,value);
+ }
+
+ /**
+ * Returns the annotation with the given annotationClass (and create it if necessary).
+ * @param from start of the substring
+ * @param to end of the substring
+ * @param annotationClass class of the annotation
+ */
+ public Annotations annotate(int from, int to, String annotationClass) {
+ return annotate(from,to,new AnnotationClass(annotationClass));
+ }
+
+ /**
+ * Returns the annotation with the given annotationClass (and create it if necessary).
+ * @param from start of the substring
+ * @param to end of the substring
+ * @param annotationClass class of the annotation
+ */
+ public Annotations annotate(int from, int to, AnnotationClass annotationClass) {
+ return rootSpan.annotate(from,to,annotationClass);
+ }
+
+ /**
+ * Sets a key/value pair for an annotation of a substring. If an annotation of the class
+ * does not exist, a new is created.
+ *
+ * A shortcut for annotate(from, to, annotationClass, key, value
+ * @param from start of the substring
+ * @param to end of the substring
+ * @param annotationClass class of the annotation
+ * @param key key of property to set on annotation
+ * @param value value of property to set on annotation
+ */
+ public void annotate(int from, int to, String annotationClass, String key, Object value) {
+ annotate(from, to,new AnnotationClass(annotationClass)).put(key, value);
+ }
+
+ /**
+ * Sets a key/value pair for an annotation of a substring. If an annotation of the class
+ * does not exist, a new is created.
+ *
+ * A shortcut for annotate(from, to, annotationClass, key, value
+ * @param from start of the substring
+ * @param to end of the substring
+ * @param annotationClass class of the annotation
+ * @param key key of property to set on annotation
+ * @param value value of property to set on annotation
+ */
+ public void annotate(int from, int to, AnnotationClass annotationClass, String key, Object value) {
+ annotate(from, to, annotationClass).put(key, value);
+ }
+
+ /**
+ * Gets all annotations mentioned in the query. This will also return all subannotations, even those that
+ * override their parents
+ */
+ public Map<AnnotationClass,List<Annotations>> getAll() {
+ return rootSpan.getAllAnnotations();
+ }
+
+ /**
+ * Returns a list of all annotations of the given class that exists in the text. This will also return
+ * all subannotations, even those that override their parents.
+ * If there are none, an empty list is returned, never null. The returned list should not be modified.
+ */
+ public List<Annotations> getAll(String annotationClass) {
+ return getAll(new AnnotationClass(annotationClass));
+ }
+
+ /**
+ * Returns a list of all annotations of the given class that exists in the text. This will also return
+ * all subannotations, even those that override their parent.
+ * If there are none, an empty list is returned, never null. The returned list should not be modified.
+ */
+ public List<Annotations> getAll(AnnotationClass annotationClass) {
+ // TODO: This implementation is very inefficient because it unnecessarily collects for all classes
+ Map<AnnotationClass,List<Annotations>> all = getAll();
+ if(all.containsKey(annotationClass)){
+ return all.get(annotationClass);
+ } else {
+ return Collections.emptyList();
+ }
+ }
+
+ /**
+ * Returns the annotation marked with the annotationClass.
+ *
+ * This is different from annotate(annotationClass) because a new annotation
+ * will not be created if it does not exist.
+ *
+ * @param annotationClass class of the annotation
+ * @return an annotation with the given class, null if it does not exists
+ */
+ public Annotations get(String annotationClass) {
+ return get(new AnnotationClass(annotationClass));
+ }
+
+ /**
+ * Returns the annotation marked with the annotationClass.
+ *
+ * This is different from annotate(annotationClass) because a new annotation
+ * will not be created if it does not exist.
+ *
+ * @param annotationClass class of the annotation
+ * @return an annotation with the given class, null if it does not exists
+ */
+ public Annotations get(AnnotationClass annotationClass) {
+ return rootSpan.getAnnotation(annotationClass);
+ }
+
+ /**
+ * Gets the value of a property set on an annotation.
+ * If the annotation or the key/value pair does not exists, null
+ * is returned
+ */
+ public Object get(String annotationClass,String key) {
+ return get(new AnnotationClass(annotationClass),key);
+ }
+
+ /**
+ * Gets the value of a property set on an annotation.
+ * If the annotation or the key/value pair does not exists, null
+ * is returned
+ */
+ public Object get(AnnotationClass annotationClass,String key) {
+ Annotations annotations = get(annotationClass);
+ if(annotations != null) {
+ return annotations.get(key);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Equivalent to <code>get(from,to,new AnnotationClass(annotationClass))</code>
+ */
+ public Annotations get(int from, int to, String annotationClass) {
+ return get(from,to,new AnnotationClass(annotationClass));
+ }
+
+ /**
+ * Gets an annotation that is set on a substring.
+ *
+ * This function first tries to find an annotation of annotationClass that
+ * describe the range (from,to). If that does not exist, it tries to find the smallest range
+ * which both contain (from,to) and has an annotation of annotationClass.
+ * If that does not exist, null is returned.
+ *
+ * For example, if these annotations has been set for the text "new york city":
+ * i.annotate(0,3,"token") //new
+ * i.annotate(4,8,"token") //york
+ * i.annotate(9,13,"city") //tokem
+ * i.annotate(0,8,"city") //new york
+ * i.annotate(0,13,"city") //new york city
+ *
+ * then:
+ *
+ * i.get(0,3,"token") //returns "token" - annotation for"new"
+ * i.get(0,3,"city") //returns "city" - annotation for "new york"
+ * i.get(9,13,"city") //returns "city" - annotation for "new york city"
+ *
+ * @param from start of the substring
+ * @param to end of the substring
+ * @param annotationClass class of the annotation
+ * @return the anno
+ */
+ public Annotations get(int from, int to, AnnotationClass annotationClass ) {
+ return rootSpan.getAnnotation(from,to,annotationClass);
+ }
+
+ /**
+ * Get the value of a property set on a substring annotation.
+ *
+ * If the annotation or the key/value pair does not exists, null
+ * is returned.
+ *
+ */
+ public Object get(int from,int to,String annotationClass,String key) {
+ Annotations annotations = get(from,to,annotationClass);
+ if(annotations != null) {
+ return annotations.get(key);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Gets all the annotationclasses that describes the text.
+
+ */
+ public Set<AnnotationClass> getClasses() {
+ return rootSpan.getClasses();
+ }
+
+ /**
+ * Gets all annotationclasses that describe a substring
+ */
+ public Set<AnnotationClass> getClasses(int from,int to) {
+ return rootSpan.getClasses(from,to);
+ }
+
+
+ /**
+ * Gets the lowermost spans (usually the spans marked with token).
+ */
+ public List<Span> getTokens() {
+ return rootSpan.getTokens();
+ }
+
+ /**
+ * Returns all spans that consists of the term given. If no span with that term exists,
+ * the empty list is returned.
+ */
+ public List<Span> getTermSpans(String term) {
+ return rootSpan.getTermSpans(term);
+ }
+
+ public @Override String toString() {
+ StringBuilder sb = new StringBuilder();
+ Map<AnnotationClass, List<Annotations>> annotations = getAll();
+ Iterator<Map.Entry<AnnotationClass,List<Annotations>>> mapIterator = annotations.entrySet().iterator();
+ while (mapIterator.hasNext()) {
+ Map.Entry<AnnotationClass, List<Annotations>> entry = mapIterator.next();
+ Iterator<Annotations> annoIterator = entry.getValue().iterator();
+ sb.append(entry.getKey()).append(" : [");
+
+ while (annoIterator.hasNext()) {
+ Annotations annotation = annoIterator.next();
+ sb.append("\"").append(annotation.getSubString()).append("\"");
+ dumpAnnotation(sb, annotation);
+ if(annoIterator.hasNext()) {
+ sb.append(",");
+ }
+ }
+ sb.append("]");
+ if(mapIterator.hasNext()) {
+ sb.append(", ");
+ }
+ }
+ sb.append(")");
+ return sb.toString();
+ }
+
+ private void dumpAnnotation(StringBuilder sb, Annotations annotations) {
+ if (annotations.getMap().size() > 0) {
+ sb.append(" : {");
+ Iterator<Map.Entry<String,Object>> valueIterator = annotations.getMap().entrySet().iterator();
+ while(valueIterator.hasNext()) {
+ Map.Entry<String,Object> value = valueIterator.next();
+ sb.append(value.getKey()).append(" : ").append(value.getValue());
+ if(valueIterator.hasNext()) {
+ sb.append(", ");
+ }
+ }
+ sb.append("}");
+
+ }
+ }
+}
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java b/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java
new file mode 100644
index 00000000000..ab92b6de961
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text.interpretation;
+
+import java.util.HashMap;
+
+/**
+ * A modification of a text.
+ *
+ * This class represents a possible rewrite of an original text. Reasons for rewrite may be due to possible
+ * spelling errors in the text or to query expansion.
+ *
+ * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a>
+ */
+public class Modification extends HashMap<String,Object>{
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -8522335044460396296L;
+
+
+ public final static AnnotationClass MODIFICATION_CLASS = new AnnotationClass("modification");
+
+
+ private String text;
+ private Annotations annotations;
+
+ public Modification(String text) {
+ this.text = text;
+ Span span = new Span(this);
+ this.annotations = span.annotate(MODIFICATION_CLASS);
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public Annotations getAnnotation() {
+ return annotations;
+ }
+
+}
+
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Span.java b/container-search/src/main/java/com/yahoo/text/interpretation/Span.java
new file mode 100644
index 00000000000..39457a0fc99
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/text/interpretation/Span.java
@@ -0,0 +1,349 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text.interpretation;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+
+/**
+ * Span is a description of a part of a text, modeled as a tree.
+ *
+ * A span is defined by the range (from,to) and by a set of annotations on that range. It also contains a set
+ * of child nodes that all have the restriction
+ * <code>child.from &gt;= parent.from &amp;&amp; child.to &lt;= parent.to &amp;&amp; (child.to-child.from) &lt; (parent.to-parent.from)</code>
+ * This means that all spans on a text can be modeled as a tree, where all child spans are guaranteed to be contained
+ * inside its parent span.
+ * <p>
+ * A span will usually be used indirectly through Interpretation.
+ *
+ * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a>
+ */
+public class Span {
+
+ private final Modification modification;
+ private List<Span> subSpans = null; //Lazy because of a large number of leaf nodes
+ private final Map<AnnotationClass, Annotations> annotations = new HashMap<>();
+ private Span parent; //Yes, this _should_ be final, but might be changed when adding an annotation
+ private final int from;
+ private final int to;
+
+
+ /**
+ * Creates a new root span based on the modfication
+ */
+ Span(final Modification modification) {
+ this.modification = modification;
+ this.parent = null;
+ this.from = 0;
+ this.to = modification.getText().length();
+ }
+
+ //This constructor is private to ensure that all child spans for a span is contained inside it.
+ private Span(int from, int to, Span parent) {
+ this.parent = parent;
+ this.modification = parent.modification;
+ this.from = from;
+ this.to = to;
+ }
+
+
+
+ /**
+ * Returns the text that this spans is
+ */
+ public String getText() {
+ return modification.getText().substring(from, to);
+ }
+
+
+ public String toString() {
+ return "SPAN: " + getText();
+ }
+
+
+ public Annotations annotate(AnnotationClass clazz) {
+ Annotations annotations = this.annotations.get(clazz);
+ if (!this.annotations.containsKey(clazz)) {
+ annotations = new Annotations(this);
+ this.annotations.put(clazz, annotations);
+ }
+ return annotations;
+ }
+
+ /**
+ * This will either create or get the annotation of the class annotation
+ */
+ public Annotations annotate(int from, int to, AnnotationClass clazz) {
+ return addAnnotation(from, to, clazz);
+ }
+
+
+ /**
+ * Returns all annotations that are contained in either this subspan or in any of its subannotations
+ */
+ public Map<AnnotationClass, List<Annotations>> getAllAnnotations() {
+ Map<AnnotationClass, List<Annotations>> result = new HashMap<>();
+ getAllAnnotations(result);
+ return result;
+ }
+
+ /**
+ * Returns all spans, either this or any of the spans that are inherits this span that match the given term
+ */
+ public List<Span> getTermSpans(String term) {
+ List<Span> spans = new ArrayList<>();
+ getTermSpans(term, spans);
+ return spans;
+ }
+
+ /**
+ * Returns the annotations with a specific class for the area defined by this span
+ * <p>
+ *
+ * This function will query its parent to find any annotation that is set for an area that this span is contained
+ * in. If there are conflicts (several annotations defined with the same annotation class), the annotation
+ * that is defined for the smallest area (furthest down in the tree), is used.
+ */
+ public Annotations getAnnotation(AnnotationClass clazz) {
+ return getAnnotation(from, to, clazz);
+ }
+
+ /**
+ * Returns the annotations with a specific class for the area defined by (from,to).
+ *
+ * This function will query its parent to find any annotation that is set for an area that this span is contained
+ * in. If there are conflicts (several annotations defined with the same annotation class), the annotation
+ * that is defined for the smallest area (furthest down in the tree), is used.
+ *
+ * @throws RuntimeException if (from,to) is not contained in the span
+ */
+ public Annotations getAnnotation(int from, int to, AnnotationClass clazz) {
+ if(from < this.from || to > this.to) {
+ throw new RuntimeException("Trying to get a range that is outside this span");
+ }
+ if (this.parent != null) {
+ return parent.getAnnotation(from, to, clazz);
+ } else {
+ return getBestAnnotation(from, to, clazz );
+
+ }
+ }
+
+ /**
+ * Returns all AnnotationClasses that are defined for this span and any of its superspans.
+ */
+ public Set<AnnotationClass> getClasses() {
+ return getClasses(from, to);
+
+ }
+
+ /**
+ * Returns all AnnotationClasses that are defined for the range (from,to).
+ *
+ * @throws RuntimeException if (from,to) is not contained in the span
+ */
+ public Set<AnnotationClass> getClasses(int from, int to) {
+ if(from < this.from || to > this.to) {
+ throw new RuntimeException("Trying to get a range that is outside this span");
+ }
+ if (this.parent != null) {
+ return parent.getClasses(from, to);
+ } else {
+ HashSet<AnnotationClass> classes = new HashSet<>();
+ getAnnotationClasses(from, to, classes);
+ return classes;
+ }
+ }
+
+
+
+ /**
+ * Returns an unmodifiable list of all spans below this span that is a leaf node
+ */
+ public List<Span> getTokens() {
+ List<Span> spans = new ArrayList<>();
+ getTokens(spans);
+ return Collections.unmodifiableList(spans);
+ }
+
+ /**
+ * Returns true if this class
+ */
+ public boolean hasClass(AnnotationClass clazz) {
+ return getClasses().contains(clazz);
+ }
+
+ /**
+ * Returns all spans that are directly childrens of this span. If the span is a leaf, the empty
+ * list will be returned. The list is unmodifable.
+ */
+ public List<Span> getSubSpans() {
+ return subSpans == null ?
+ Collections.<Span>emptyList() :
+ Collections.unmodifiableList(subSpans);
+ }
+
+ /** hack */
+ public int getFrom() { return from; }
+ /** hack */
+ public int getTo() { return to; }
+
+ //Needed by addAnnotation
+ private List<Span> getRemovableSubSpan() {
+ return subSpans == null ?
+ Collections.<Span>emptyList() :
+ subSpans;
+ }
+
+
+ private void addSubSpan(Span span) {
+ if(subSpans == null) {
+ subSpans = new ArrayList<>();
+ }
+ subSpans.add(span);
+ }
+
+
+ /*
+ * How this works:
+ *
+ * First we check if any excisting subannotation can contain this annotation. If so, we leave it to them to add
+ * the new annotation.
+ *
+ * Then we check if the new annotation intersects any of the excisting annotations. That is illegal to do
+ *
+ * We then add all subannotations that are strictly contained in the new annotation to the new annotation.
+ */
+ private Annotations addAnnotation(int from, int to, AnnotationClass clazz) {
+ if (equalsRange(from, to)) {
+ //We simply add everything from the new span to this
+ if (annotations.containsKey(clazz)) {
+ return annotations.get(clazz);
+ } else {
+ Annotations nAnnotations = new Annotations(this);
+ annotations.put(clazz,nAnnotations);
+ return nAnnotations;
+ }
+ }
+
+ //We then check if any of the children intersects
+ for (Span subSpan : getSubSpans()) {
+ if (subSpan.intersects(from, to)) {
+ throw new RuntimeException("Trying to add span that intersects already excisting span");
+ } else if (subSpan.contains(from, to)) {
+ return subSpan.addAnnotation(from, to, clazz);
+ }
+ }
+
+ //We now know that we have to add the new span to this span
+ Span span = new Span(from, to, this);
+ Annotations nAnnotations = new Annotations(span);
+ span.annotations.put(clazz,nAnnotations);
+ addSubSpan(span);
+
+
+ //We then add any subannotation that is inside the span
+ Iterator<Span> subIterator = getRemovableSubSpan().iterator();
+
+ while (subIterator.hasNext()) {
+ Span subSpan = subIterator.next();
+ if (subSpan.contains(from, to)) {
+ return subSpan.addAnnotation(from, to, clazz);
+ } else if (subSpan.isInside(from, to)) {
+ //Overtake the subannotation
+ subSpan.parent = span;
+ span.addSubSpan(subSpan);
+ subIterator.remove();
+ }
+ }
+ return nAnnotations;
+ }
+
+
+ private boolean contains(int from, int to) {
+ return this.from <= from && this.to >= to;
+ }
+
+ private boolean isInside(int from, int to) {
+ return this.from >= from && this.to <= to;
+ }
+
+
+ private boolean intersects(int from, int to) {
+ return (this.from < from && this.to > from && this.to < to)
+ || (this.from < to && this.to > to && this.from > from);
+
+
+ }
+
+ private boolean equalsRange(int from, int to) {
+ return this.from == from && this.to == to;
+ }
+
+ private void getAllAnnotations(Map<AnnotationClass, List<Annotations>> results) {
+ for(Map.Entry<AnnotationClass, Annotations> entry : annotations.entrySet()) {
+ List<Annotations> anList = results.get(entry.getKey());
+ if (anList == null) {
+ anList = new ArrayList<>();
+ results.put(entry.getKey(), anList);
+ }
+ anList.add(entry.getValue());
+ }
+
+ for(Span subSpan : getSubSpans()) {
+ subSpan.getAllAnnotations(results);
+ }
+ }
+
+ private void getTermSpans(String term, List<Span> spans) {
+ if(term.equalsIgnoreCase(this.getText())) {
+ spans.add(this);
+ }
+ for(Span subSpan : getSubSpans()) {
+ subSpan.getTermSpans(term, spans);
+ }
+ }
+
+
+ private void getAnnotationClasses(int from, int to, Set<AnnotationClass> classes) {
+ if (!contains(from, to)) {
+ return;
+ }
+ classes.addAll(annotations.keySet());
+ for (Span subSpan : getSubSpans()) {
+ subSpan.getAnnotationClasses(from, to, classes);
+ }
+ }
+
+ private void getTokens(List<Span> spans) {
+ if (getSubSpans().size() == 0) {
+ spans.add(this);
+ } else {
+ for (Span subSpan : getSubSpans()) {
+ subSpan.getTokens(spans);
+ }
+
+ }
+ }
+
+ private Annotations getBestAnnotation(int from, int to, AnnotationClass clazz) {
+ if (!contains(from, to)) {
+ return null;
+ }
+ //First yourself, then the subs
+ Annotations annotations = this.annotations.get(clazz);
+ for (Span subSpan : getSubSpans()) {
+ Annotations subAnnotations = subSpan.getBestAnnotation(from, to, clazz);
+ if (subAnnotations != null) {
+ annotations = subAnnotations;
+ }
+ }
+ return annotations;
+ }
+}
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java b/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java
new file mode 100644
index 00000000000..902dc58d551
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Models a set of hierarchical annotations (typically produced by QLAS) of a natural language string.
+ */
+@ExportPackage
+@PublicApi
+package com.yahoo.text.interpretation;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;