diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/text')
6 files changed, 965 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java b/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java new file mode 100644 index 00000000000..60dd24c5ccc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +// TODO: Javadoc +// TODO: Eventually we need to define the set of classes available here + +public class AnnotationClass { + + private String clazz; + + public AnnotationClass(String clazz) { + this.clazz = clazz; + } + + public String getClazz() { + return clazz; + } + + + @Override + public boolean equals(Object o) { + if (!(o instanceof AnnotationClass)) { + return false; + } + AnnotationClass aClass = (AnnotationClass)o; + return aClass.clazz == null ? clazz == null : clazz.equals(aClass.getClazz()); + } + + @Override + public int hashCode() { + return clazz == null ? 0 : clazz.hashCode(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java b/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java new file mode 100644 index 00000000000..759cf2b173b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * An annotation is a description of a an area of text, with a given class. For example, an annotation for the + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Annotations { + + + private Span span; + + protected Map<String,Object> annotations; + + + /** + * Adds an annotation to the the the set of annotations. + */ + public void put(String key,Object o) { + if(annotations == null) { + annotations = new HashMap<>(); + } + annotations.put(key,o); + } + + public Map<String,Object> getMap() { + if(annotations == null) { + return Collections.emptyMap(); + } else { + return annotations; + } + } + + public Annotations(Span span) { + this.span = span; + } + + public Object get(String key) { + return getMap().get(key); + } + + /** + * The span that this annotation is for. + */ + public Span getSpan() { + return span; + } + + /** + * The text this annotation is for. + */ + public String getSubString() { + return span.getText(); + } + + + /** + * Helper function to get a Double annotation. + * <p> + * This function first checks if the Object in a map is a <code>Number</code>, and then calls doubleValue() on it + * If it is not, then Double.parseDouble() is called on the string representation of the object. If the string + * is not parseable as a double, a NumberFormatException is thrown. + */ + public Double getDouble(String key) { + Object o = getMap().get(key); + if(o instanceof Number) { + return ((Number)o).doubleValue(); + } else if(o == null) { + return null; + } else { + return Double.parseDouble(o.toString()); + } + } + + /** + * Helper function to get a String from the Annotation. This function will simply call <code>toString()</code> on the + * object saved in the Annotation or return null if the object is null; + */ + public String getString(String key) { + Object o = getMap().get(key); + if(o == null) { + return null; + } else { + return o.toString(); + } + } + + /** + * Helper function to get a Double annotation. + * <p> + * This function first checks if the Object in a map is a <code>Number</code>, and intValue() is called on it. + * If it is not, then Double.parseDouble() is called on the string representation of the object. If the string + * is not parseable as a double, a NumberFormatException is thrown. + */ + public Integer getInteger(String key) { + Object o = getMap().get(key); + if(o == null) { + return null; + } else if(o instanceof Number) { + return ((Number)o).intValue(); + } else { + return Integer.parseInt(o.toString()); + } + } + + /** + * Helper function to get a Boolean annotation. + */ + public Boolean getBoolean(String key) { + Object o = getMap().get(key); + if(o == null || !(o instanceof Boolean)) { + return null; + } else { + return (Boolean) o; + } + } + + +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java b/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java new file mode 100644 index 00000000000..d80ff80f172 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java @@ -0,0 +1,404 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * + * An interpretation of a text. + * + * This class it the main class to use when when querying and modifying annotations for a text. + * + * The interpretation consists of a tree of annotations, with the nodes in tree being Spans. An annotation + * is defined by its annotationClass ("person"), and by a key/value map of + * parameters for that annotationClass (if the person is an actor or other notable person). + * + * This class is the main class for querying and setting annotations, where modifying the span tree + * is not needed. + * + * @see Span + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Interpretation { + + private Modification modification; + private double probability; + private Span rootSpan; + + public final static AnnotationClass INTERPRETATION_CLASS = new AnnotationClass("interpretation"); + + + /** + * Creates a new interpretation and a new modification from the text, + * with the probability set to the default value(0.0). + + */ + public Interpretation(String text) { + this(text,0.0); + } + + /** + * Creates a new interpretation and a new modification from the text, with the given probability. + */ + public Interpretation(String text, double probabilty) { + this(new Modification(text),probabilty); + } + + + /** + * Creates a new interpretation based on the modification, with the probability set to the default value(0.0). + */ + public Interpretation(Modification modification) { + this(modification,0.0); + } + + /** + * Creates an interpretation based on the modification given. + */ + public Interpretation(Modification modification,double probability) { + this.modification = modification; + rootSpan = new Span(modification); + setProbability(probability); + } + + + public Modification getModification() { + return modification; + } + + + /** + * The probability that this interpretation is correct. + * @return a value between 0.0 and 1.0 that gives the probability that this interpretation is correct + */ + public double getProbability() { + return probability; + } + + /** + * Sets he probability that this interpretation is the correct. The value is not normalized, + * meaning that it can have a value larger than 1.0. + * + * The value is used when sorting interpretations. + */ + public void setProbability(double probability) { + if (probability < 0) { + probability = 0.0; + } else if (probability > 1.0) { + probability = 1.0; + } + this.probability = probability; + + } + + /** Returns the root of the tree representation of the interpretation */ + public Span root() { return rootSpan; } + + + // Wrapper methods for Span + + /** + * Return the annotation with the given annotationclass (and create it if necessary). + * @param annotationClass The class of the annotation + * + */ + public Annotations annotate(String annotationClass) { + return annotate(new AnnotationClass(annotationClass)); + } + + /** + * Return the annotation with the given annotationclass (and create it if necessary). + * @param annotationClass The class of the annotation + * + */ + public Annotations annotate(AnnotationClass annotationClass) { + return rootSpan.annotate(annotationClass); + } + + /** + * Sets a key/value pair for an annotation. If an annotation of the class does not + * exist, a new is created. + * + * A shortcut for annotate(annotationClass).put(key,value) + * @param annotationClass class of the annotation + * @param key key of the property to set on the annotation + * @param value value of the property to set on the annotation + */ + public void annotate(String annotationClass, String key, Object value) { + annotate(new AnnotationClass(annotationClass)).put(key,value); + } + + /** + * Sets a key/value pair for an annotation. If an annotation of the class does not + * exist, a new is created. + * + * A shortcut for annotate(annotationClass).put(key,value) + * @param annotationClass class of the annotation + * @param key key of the property to set on the annotation + * @param value value of the property to set on the annotation + */ + public void annotate(AnnotationClass annotationClass, String key, Object value) { + annotate(annotationClass).put(key,value); + } + + /** + * Returns the annotation with the given annotationClass (and create it if necessary). + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + */ + public Annotations annotate(int from, int to, String annotationClass) { + return annotate(from,to,new AnnotationClass(annotationClass)); + } + + /** + * Returns the annotation with the given annotationClass (and create it if necessary). + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + */ + public Annotations annotate(int from, int to, AnnotationClass annotationClass) { + return rootSpan.annotate(from,to,annotationClass); + } + + /** + * Sets a key/value pair for an annotation of a substring. If an annotation of the class + * does not exist, a new is created. + * + * A shortcut for annotate(from, to, annotationClass, key, value + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + * @param key key of property to set on annotation + * @param value value of property to set on annotation + */ + public void annotate(int from, int to, String annotationClass, String key, Object value) { + annotate(from, to,new AnnotationClass(annotationClass)).put(key, value); + } + + /** + * Sets a key/value pair for an annotation of a substring. If an annotation of the class + * does not exist, a new is created. + * + * A shortcut for annotate(from, to, annotationClass, key, value + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + * @param key key of property to set on annotation + * @param value value of property to set on annotation + */ + public void annotate(int from, int to, AnnotationClass annotationClass, String key, Object value) { + annotate(from, to, annotationClass).put(key, value); + } + + /** + * Gets all annotations mentioned in the query. This will also return all subannotations, even those that + * override their parents + */ + public Map<AnnotationClass,List<Annotations>> getAll() { + return rootSpan.getAllAnnotations(); + } + + /** + * Returns a list of all annotations of the given class that exists in the text. This will also return + * all subannotations, even those that override their parents. + * If there are none, an empty list is returned, never null. The returned list should not be modified. + */ + public List<Annotations> getAll(String annotationClass) { + return getAll(new AnnotationClass(annotationClass)); + } + + /** + * Returns a list of all annotations of the given class that exists in the text. This will also return + * all subannotations, even those that override their parent. + * If there are none, an empty list is returned, never null. The returned list should not be modified. + */ + public List<Annotations> getAll(AnnotationClass annotationClass) { + // TODO: This implementation is very inefficient because it unnecessarily collects for all classes + Map<AnnotationClass,List<Annotations>> all = getAll(); + if(all.containsKey(annotationClass)){ + return all.get(annotationClass); + } else { + return Collections.emptyList(); + } + } + + /** + * Returns the annotation marked with the annotationClass. + * + * This is different from annotate(annotationClass) because a new annotation + * will not be created if it does not exist. + * + * @param annotationClass class of the annotation + * @return an annotation with the given class, null if it does not exists + */ + public Annotations get(String annotationClass) { + return get(new AnnotationClass(annotationClass)); + } + + /** + * Returns the annotation marked with the annotationClass. + * + * This is different from annotate(annotationClass) because a new annotation + * will not be created if it does not exist. + * + * @param annotationClass class of the annotation + * @return an annotation with the given class, null if it does not exists + */ + public Annotations get(AnnotationClass annotationClass) { + return rootSpan.getAnnotation(annotationClass); + } + + /** + * Gets the value of a property set on an annotation. + * If the annotation or the key/value pair does not exists, null + * is returned + */ + public Object get(String annotationClass,String key) { + return get(new AnnotationClass(annotationClass),key); + } + + /** + * Gets the value of a property set on an annotation. + * If the annotation or the key/value pair does not exists, null + * is returned + */ + public Object get(AnnotationClass annotationClass,String key) { + Annotations annotations = get(annotationClass); + if(annotations != null) { + return annotations.get(key); + } else { + return null; + } + } + + /** + * Equivalent to <code>get(from,to,new AnnotationClass(annotationClass))</code> + */ + public Annotations get(int from, int to, String annotationClass) { + return get(from,to,new AnnotationClass(annotationClass)); + } + + /** + * Gets an annotation that is set on a substring. + * + * This function first tries to find an annotation of annotationClass that + * describe the range (from,to). If that does not exist, it tries to find the smallest range + * which both contain (from,to) and has an annotation of annotationClass. + * If that does not exist, null is returned. + * + * For example, if these annotations has been set for the text "new york city": + * i.annotate(0,3,"token") //new + * i.annotate(4,8,"token") //york + * i.annotate(9,13,"city") //tokem + * i.annotate(0,8,"city") //new york + * i.annotate(0,13,"city") //new york city + * + * then: + * + * i.get(0,3,"token") //returns "token" - annotation for"new" + * i.get(0,3,"city") //returns "city" - annotation for "new york" + * i.get(9,13,"city") //returns "city" - annotation for "new york city" + * + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + * @return the anno + */ + public Annotations get(int from, int to, AnnotationClass annotationClass ) { + return rootSpan.getAnnotation(from,to,annotationClass); + } + + /** + * Get the value of a property set on a substring annotation. + * + * If the annotation or the key/value pair does not exists, null + * is returned. + * + */ + public Object get(int from,int to,String annotationClass,String key) { + Annotations annotations = get(from,to,annotationClass); + if(annotations != null) { + return annotations.get(key); + } else { + return null; + } + } + + /** + * Gets all the annotationclasses that describes the text. + + */ + public Set<AnnotationClass> getClasses() { + return rootSpan.getClasses(); + } + + /** + * Gets all annotationclasses that describe a substring + */ + public Set<AnnotationClass> getClasses(int from,int to) { + return rootSpan.getClasses(from,to); + } + + + /** + * Gets the lowermost spans (usually the spans marked with token). + */ + public List<Span> getTokens() { + return rootSpan.getTokens(); + } + + /** + * Returns all spans that consists of the term given. If no span with that term exists, + * the empty list is returned. + */ + public List<Span> getTermSpans(String term) { + return rootSpan.getTermSpans(term); + } + + public @Override String toString() { + StringBuilder sb = new StringBuilder(); + Map<AnnotationClass, List<Annotations>> annotations = getAll(); + Iterator<Map.Entry<AnnotationClass,List<Annotations>>> mapIterator = annotations.entrySet().iterator(); + while (mapIterator.hasNext()) { + Map.Entry<AnnotationClass, List<Annotations>> entry = mapIterator.next(); + Iterator<Annotations> annoIterator = entry.getValue().iterator(); + sb.append(entry.getKey()).append(" : ["); + + while (annoIterator.hasNext()) { + Annotations annotation = annoIterator.next(); + sb.append("\"").append(annotation.getSubString()).append("\""); + dumpAnnotation(sb, annotation); + if(annoIterator.hasNext()) { + sb.append(","); + } + } + sb.append("]"); + if(mapIterator.hasNext()) { + sb.append(", "); + } + } + sb.append(")"); + return sb.toString(); + } + + private void dumpAnnotation(StringBuilder sb, Annotations annotations) { + if (annotations.getMap().size() > 0) { + sb.append(" : {"); + Iterator<Map.Entry<String,Object>> valueIterator = annotations.getMap().entrySet().iterator(); + while(valueIterator.hasNext()) { + Map.Entry<String,Object> value = valueIterator.next(); + sb.append(value.getKey()).append(" : ").append(value.getValue()); + if(valueIterator.hasNext()) { + sb.append(", "); + } + } + sb.append("}"); + + } + } +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java b/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java new file mode 100644 index 00000000000..ab92b6de961 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.HashMap; + +/** + * A modification of a text. + * + * This class represents a possible rewrite of an original text. Reasons for rewrite may be due to possible + * spelling errors in the text or to query expansion. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Modification extends HashMap<String,Object>{ + + /** + * + */ + private static final long serialVersionUID = -8522335044460396296L; + + + public final static AnnotationClass MODIFICATION_CLASS = new AnnotationClass("modification"); + + + private String text; + private Annotations annotations; + + public Modification(String text) { + this.text = text; + Span span = new Span(this); + this.annotations = span.annotate(MODIFICATION_CLASS); + } + + public String getText() { + return text; + } + + public Annotations getAnnotation() { + return annotations; + } + +} + diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Span.java b/container-search/src/main/java/com/yahoo/text/interpretation/Span.java new file mode 100644 index 00000000000..39457a0fc99 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Span.java @@ -0,0 +1,349 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +/** + * Span is a description of a part of a text, modeled as a tree. + * + * A span is defined by the range (from,to) and by a set of annotations on that range. It also contains a set + * of child nodes that all have the restriction + * <code>child.from >= parent.from && child.to <= parent.to && (child.to-child.from) < (parent.to-parent.from)</code> + * This means that all spans on a text can be modeled as a tree, where all child spans are guaranteed to be contained + * inside its parent span. + * <p> + * A span will usually be used indirectly through Interpretation. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Span { + + private final Modification modification; + private List<Span> subSpans = null; //Lazy because of a large number of leaf nodes + private final Map<AnnotationClass, Annotations> annotations = new HashMap<>(); + private Span parent; //Yes, this _should_ be final, but might be changed when adding an annotation + private final int from; + private final int to; + + + /** + * Creates a new root span based on the modfication + */ + Span(final Modification modification) { + this.modification = modification; + this.parent = null; + this.from = 0; + this.to = modification.getText().length(); + } + + //This constructor is private to ensure that all child spans for a span is contained inside it. + private Span(int from, int to, Span parent) { + this.parent = parent; + this.modification = parent.modification; + this.from = from; + this.to = to; + } + + + + /** + * Returns the text that this spans is + */ + public String getText() { + return modification.getText().substring(from, to); + } + + + public String toString() { + return "SPAN: " + getText(); + } + + + public Annotations annotate(AnnotationClass clazz) { + Annotations annotations = this.annotations.get(clazz); + if (!this.annotations.containsKey(clazz)) { + annotations = new Annotations(this); + this.annotations.put(clazz, annotations); + } + return annotations; + } + + /** + * This will either create or get the annotation of the class annotation + */ + public Annotations annotate(int from, int to, AnnotationClass clazz) { + return addAnnotation(from, to, clazz); + } + + + /** + * Returns all annotations that are contained in either this subspan or in any of its subannotations + */ + public Map<AnnotationClass, List<Annotations>> getAllAnnotations() { + Map<AnnotationClass, List<Annotations>> result = new HashMap<>(); + getAllAnnotations(result); + return result; + } + + /** + * Returns all spans, either this or any of the spans that are inherits this span that match the given term + */ + public List<Span> getTermSpans(String term) { + List<Span> spans = new ArrayList<>(); + getTermSpans(term, spans); + return spans; + } + + /** + * Returns the annotations with a specific class for the area defined by this span + * <p> + * + * This function will query its parent to find any annotation that is set for an area that this span is contained + * in. If there are conflicts (several annotations defined with the same annotation class), the annotation + * that is defined for the smallest area (furthest down in the tree), is used. + */ + public Annotations getAnnotation(AnnotationClass clazz) { + return getAnnotation(from, to, clazz); + } + + /** + * Returns the annotations with a specific class for the area defined by (from,to). + * + * This function will query its parent to find any annotation that is set for an area that this span is contained + * in. If there are conflicts (several annotations defined with the same annotation class), the annotation + * that is defined for the smallest area (furthest down in the tree), is used. + * + * @throws RuntimeException if (from,to) is not contained in the span + */ + public Annotations getAnnotation(int from, int to, AnnotationClass clazz) { + if(from < this.from || to > this.to) { + throw new RuntimeException("Trying to get a range that is outside this span"); + } + if (this.parent != null) { + return parent.getAnnotation(from, to, clazz); + } else { + return getBestAnnotation(from, to, clazz ); + + } + } + + /** + * Returns all AnnotationClasses that are defined for this span and any of its superspans. + */ + public Set<AnnotationClass> getClasses() { + return getClasses(from, to); + + } + + /** + * Returns all AnnotationClasses that are defined for the range (from,to). + * + * @throws RuntimeException if (from,to) is not contained in the span + */ + public Set<AnnotationClass> getClasses(int from, int to) { + if(from < this.from || to > this.to) { + throw new RuntimeException("Trying to get a range that is outside this span"); + } + if (this.parent != null) { + return parent.getClasses(from, to); + } else { + HashSet<AnnotationClass> classes = new HashSet<>(); + getAnnotationClasses(from, to, classes); + return classes; + } + } + + + + /** + * Returns an unmodifiable list of all spans below this span that is a leaf node + */ + public List<Span> getTokens() { + List<Span> spans = new ArrayList<>(); + getTokens(spans); + return Collections.unmodifiableList(spans); + } + + /** + * Returns true if this class + */ + public boolean hasClass(AnnotationClass clazz) { + return getClasses().contains(clazz); + } + + /** + * Returns all spans that are directly childrens of this span. If the span is a leaf, the empty + * list will be returned. The list is unmodifable. + */ + public List<Span> getSubSpans() { + return subSpans == null ? + Collections.<Span>emptyList() : + Collections.unmodifiableList(subSpans); + } + + /** hack */ + public int getFrom() { return from; } + /** hack */ + public int getTo() { return to; } + + //Needed by addAnnotation + private List<Span> getRemovableSubSpan() { + return subSpans == null ? + Collections.<Span>emptyList() : + subSpans; + } + + + private void addSubSpan(Span span) { + if(subSpans == null) { + subSpans = new ArrayList<>(); + } + subSpans.add(span); + } + + + /* + * How this works: + * + * First we check if any excisting subannotation can contain this annotation. If so, we leave it to them to add + * the new annotation. + * + * Then we check if the new annotation intersects any of the excisting annotations. That is illegal to do + * + * We then add all subannotations that are strictly contained in the new annotation to the new annotation. + */ + private Annotations addAnnotation(int from, int to, AnnotationClass clazz) { + if (equalsRange(from, to)) { + //We simply add everything from the new span to this + if (annotations.containsKey(clazz)) { + return annotations.get(clazz); + } else { + Annotations nAnnotations = new Annotations(this); + annotations.put(clazz,nAnnotations); + return nAnnotations; + } + } + + //We then check if any of the children intersects + for (Span subSpan : getSubSpans()) { + if (subSpan.intersects(from, to)) { + throw new RuntimeException("Trying to add span that intersects already excisting span"); + } else if (subSpan.contains(from, to)) { + return subSpan.addAnnotation(from, to, clazz); + } + } + + //We now know that we have to add the new span to this span + Span span = new Span(from, to, this); + Annotations nAnnotations = new Annotations(span); + span.annotations.put(clazz,nAnnotations); + addSubSpan(span); + + + //We then add any subannotation that is inside the span + Iterator<Span> subIterator = getRemovableSubSpan().iterator(); + + while (subIterator.hasNext()) { + Span subSpan = subIterator.next(); + if (subSpan.contains(from, to)) { + return subSpan.addAnnotation(from, to, clazz); + } else if (subSpan.isInside(from, to)) { + //Overtake the subannotation + subSpan.parent = span; + span.addSubSpan(subSpan); + subIterator.remove(); + } + } + return nAnnotations; + } + + + private boolean contains(int from, int to) { + return this.from <= from && this.to >= to; + } + + private boolean isInside(int from, int to) { + return this.from >= from && this.to <= to; + } + + + private boolean intersects(int from, int to) { + return (this.from < from && this.to > from && this.to < to) + || (this.from < to && this.to > to && this.from > from); + + + } + + private boolean equalsRange(int from, int to) { + return this.from == from && this.to == to; + } + + private void getAllAnnotations(Map<AnnotationClass, List<Annotations>> results) { + for(Map.Entry<AnnotationClass, Annotations> entry : annotations.entrySet()) { + List<Annotations> anList = results.get(entry.getKey()); + if (anList == null) { + anList = new ArrayList<>(); + results.put(entry.getKey(), anList); + } + anList.add(entry.getValue()); + } + + for(Span subSpan : getSubSpans()) { + subSpan.getAllAnnotations(results); + } + } + + private void getTermSpans(String term, List<Span> spans) { + if(term.equalsIgnoreCase(this.getText())) { + spans.add(this); + } + for(Span subSpan : getSubSpans()) { + subSpan.getTermSpans(term, spans); + } + } + + + private void getAnnotationClasses(int from, int to, Set<AnnotationClass> classes) { + if (!contains(from, to)) { + return; + } + classes.addAll(annotations.keySet()); + for (Span subSpan : getSubSpans()) { + subSpan.getAnnotationClasses(from, to, classes); + } + } + + private void getTokens(List<Span> spans) { + if (getSubSpans().size() == 0) { + spans.add(this); + } else { + for (Span subSpan : getSubSpans()) { + subSpan.getTokens(spans); + } + + } + } + + private Annotations getBestAnnotation(int from, int to, AnnotationClass clazz) { + if (!contains(from, to)) { + return null; + } + //First yourself, then the subs + Annotations annotations = this.annotations.get(clazz); + for (Span subSpan : getSubSpans()) { + Annotations subAnnotations = subSpan.getBestAnnotation(from, to, clazz); + if (subAnnotations != null) { + annotations = subAnnotations; + } + } + return annotations; + } +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java b/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java new file mode 100644 index 00000000000..902dc58d551 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Models a set of hierarchical annotations (typically produced by QLAS) of a natural language string. + */ +@ExportPackage +@PublicApi +package com.yahoo.text.interpretation; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; |