// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.document.annotation;
import com.yahoo.document.ArrayDataType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.document.Field;
import com.yahoo.document.StructDataType;
import com.yahoo.document.datatypes.Array;
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.datatypes.Struct;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
/**
* Contains code snippets that are used in the documentation. Not really a test case.
*
* @author Einar M R Rosenvinge
*/
public class DocTestCase {
private class Processing {
private Service getService() {
return null;
}
}
private class Service {
private DocumentTypeManager getDocumentTypeManager() {
return null;
}
}
private Processing processing = null;
@Test
public void testSimple1() {
StringFieldValue text = new StringFieldValue("
DiaryI live in San Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
SpanList root = new SpanList();
root.add(new Span(0, 19))
.add(new Span(19, 5))
.add(new Span(24, 21))
.add(new Span(45, 23))
.add(new Span(68, 14));
SpanTree tree = new SpanTree("html", root);
text.setSpanTree(tree);
}
public void simple2() {
//the following line works inside process(Document, Arguments, Processing) in a DocumentProcessor
AnnotationTypeRegistry atr = processing.getService().getDocumentTypeManager().getAnnotationTypeRegistry();
StringFieldValue text = new StringFieldValue("DiaryI live in San Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
AnnotationType textType = atr.getType("text");
AnnotationType markup = atr.getType("markup");
SpanList root = new SpanList();
SpanTree tree = new SpanTree("html", root);
Span span1 = new Span(0, 19);
root.add(span1);
tree.annotate(span1, markup);
Span span2 = new Span(19, 5);
root.add(span2);
tree.annotate(span2, textType);
Span span3 = new Span(24, 21);
root.add(span3);
tree.annotate(span3, markup);
Span span4 = new Span(45, 23);
root.add(span4);
tree.annotate(span4, textType);
Span span5 = new Span(68, 14);
root.add(span5);
tree.annotate(span5, markup);
text.setSpanTree(tree);
}
public void simple3() {
//the following line works inside process(Document, Arguments, Processing) in a DocumentProcessor
AnnotationTypeRegistry atr = processing.getService().getDocumentTypeManager().getAnnotationTypeRegistry();
StringFieldValue text = new StringFieldValue("DiaryI live in San Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
SpanList root = new SpanList();
SpanTree tree = new SpanTree("html", root);
AnnotationType textType = atr.getType("text");
AnnotationType beginTag = atr.getType("begintag");
AnnotationType endTag = atr.getType("endtag");
AnnotationType bodyType = atr.getType("body");
AnnotationType headerType = atr.getType("header");
SpanList header = new SpanList();
{
Span span1 = new Span(6, 6);
Span span2 = new Span(12, 7);
Span span3 = new Span(19, 5);
Span span4 = new Span(24, 8);
Span span5 = new Span(32, 7);
header.add(span1)
.add(span2)
.add(span3)
.add(span4)
.add(span5);
tree.annotate(span1, beginTag)
.annotate(span2, beginTag)
.annotate(span3, textType)
.annotate(span4, endTag)
.annotate(span5, endTag)
.annotate(header, headerType);
}
SpanList body = new SpanList();
{
Span span1 = new Span(39, 6);
Span span2 = new Span(45, 23);
Span span3 = new Span(68, 7);
body.add(span1)
.add(span2)
.add(span3);
tree.annotate(span1, beginTag)
.annotate(span2, textType)
.annotate(span3, endTag)
.annotate(body, bodyType);
}
{
Span span1 = new Span(0, 6);
Span span2 = new Span(75, 7);
root.add(span1)
.add(header)
.add(body)
.add(span2);
tree.annotate(span1, beginTag)
.annotate(span2, endTag);
}
text.setSpanTree(tree);
}
public void simple4() {
//the following line works inside process(Document, Arguments, Processing) in a DocumentProcessor
AnnotationTypeRegistry atr = processing.getService().getDocumentTypeManager().getAnnotationTypeRegistry();
StringFieldValue text = new StringFieldValue("DiaryI live in San Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
SpanList root = new SpanList();
SpanTree tree = new SpanTree("html", root);
AnnotationType textType = atr.getType("text");
AnnotationType beginTag = atr.getType("begintag");
AnnotationType endTag = atr.getType("endtag");
AnnotationType bodyType = atr.getType("body");
AnnotationType headerType = atr.getType("header");
AnnotationType cityType = atr.getType("city");
Struct position = (Struct) cityType.getDataType().createFieldValue();
position.setFieldValue("latitude", 37.774929);
position.setFieldValue("longitude", -122.419415);
Annotation city = new Annotation(cityType, position);
SpanList header = new SpanList();
{
Span span1 = new Span(6, 6);
Span span2 = new Span(12, 7);
Span span3 = new Span(19, 5);
Span span4 = new Span(24, 8);
Span span5 = new Span(32, 7);
header.add(span1)
.add(span2)
.add(span3)
.add(span4)
.add(span5);
tree.annotate(span1, beginTag)
.annotate(span2, beginTag)
.annotate(span3, textType)
.annotate(span4, endTag)
.annotate(span4, endTag)
.annotate(header, headerType);
}
SpanList textNode = new SpanList();
{
Span span1 = new Span(45, 10);
Span span2 = new Span(55, 13);
textNode.add(span1)
.add(span2);
tree.annotate(span2, city)
.annotate(textNode, textType);
}
SpanList body = new SpanList();
{
Span span1 = new Span(39, 6);
Span span2 = new Span(68, 7);
body.add(span1)
.add(textNode)
.add(span2);
tree.annotate(span1, beginTag)
.annotate(span2, endTag)
.annotate(body, bodyType);
}
{
Span span1 = new Span(0, 6);
Span span2 = new Span(75, 7);
root.add(span1)
.add(header)
.add(body)
.add(span2);
tree.annotate(span1, beginTag)
.annotate(span2, endTag);
}
text.setSpanTree(tree);
}
public void simple5() {
//the following two lines work inside process(Document, Arguments, Processing) in a DocumentProcessor
DocumentTypeManager dtm = processing.getService().getDocumentTypeManager();
AnnotationTypeRegistry atr = dtm.getAnnotationTypeRegistry();
StringFieldValue text = new StringFieldValue("I live in San
Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
SpanList root = new SpanList();
SpanTree tree = new SpanTree("html", root);
var docType = dtm.getDocumentType("blogpost");
StructDataType positionType = docType.getDeclaredStructType("position");
AnnotationType textType = atr.getType("text");
AnnotationType beginTag = atr.getType("begintag");
AnnotationType endTag = atr.getType("endtag");
AnnotationType bodyType = atr.getType("body");
AnnotationType paragraphType = atr.getType("paragraph");
AnnotationType cityType = atr.getType("city");
Struct position = new Struct(positionType);
position.setFieldValue("latitude", 37.774929);
position.setFieldValue("longitude", -122.419415);
Annotation sanAnnotation = new Annotation(textType);
Annotation franciscoAnnotation = new Annotation(textType);
Struct positionWithRef = (Struct) cityType.getDataType().createFieldValue();
positionWithRef.setFieldValue("position", position);
Field referencesField = ((StructDataType) cityType.getDataType()).getField("references");
Array refList = new Array(referencesField.getDataType());
AnnotationReferenceDataType annRefType = (AnnotationReferenceDataType) ((ArrayDataType) referencesField.getDataType()).getNestedType();
refList.add(new AnnotationReference(annRefType, sanAnnotation));
refList.add(new AnnotationReference(annRefType, franciscoAnnotation));
positionWithRef.setFieldValue(referencesField, refList);
Annotation city = new Annotation(cityType, positionWithRef);
SpanList paragraph = new SpanList();
{
Span span1 = new Span(6, 3);
Span span2 = new Span(9, 10);
Span span3 = new Span(19, 4);
Span span4 = new Span(23, 4);
paragraph.add(span1)
.add(span2)
.add(span3)
.add(span4);
tree.annotate(span1, beginTag)
.annotate(span2, textType)
.annotate(span3, sanAnnotation)
.annotate(span4, endTag)
.annotate(paragraph, paragraphType);
}
{
Span span1 = new Span(0, 6);
Span span2 = new Span(27, 9);
Span span3 = new Span(36, 8);
root.add(span1)
.add(paragraph)
.add(span2)
.add(span3);
tree.annotate(span1, beginTag)
.annotate(span2, franciscoAnnotation)
.annotate(span3, endTag)
.annotate(root, bodyType)
.annotate(city);
}
text.setSpanTree(tree);
}
public void simple6() {
StringFieldValue text = new StringFieldValue("DiaryI live in San Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
SpanTree tree = text.getSpanTree("html");
SpanList root = (SpanList) tree.getRoot();
//TODO: Note that the above could have been a Span or an AlternateSpanList!
ListIterator nodeIt = root.childIterator();
AnnotationType beginTag = new AnnotationType("begintag");
AnnotationType endTag = new AnnotationType("endtag");
while (nodeIt.hasNext()) {
SpanNode node = nodeIt.next();
boolean nodeHadMarkupAnnotation = removeMarkupAnnotation(tree, node);
if (nodeHadMarkupAnnotation) {
nodeIt.remove();
List replacementNodes = analyzeMarkup(tree, node, text, beginTag, endTag);
for (SpanNode repl : replacementNodes) {
nodeIt.add(repl);
}
}
}
}
/**
* Removes annotations of type 'markup' from the given node.
*
* @param tree the tree to remove annotations from
* @param node the node to remove annotations of type 'markup' from
* @return true if the given node had 'markup' annotations, false otherwise
*/
private boolean removeMarkupAnnotation(SpanTree tree, SpanNode node) {
//get iterator over all annotations on this node:
Iterator annotationIt = tree.iterator(node);
while (annotationIt.hasNext()) {
Annotation annotation = annotationIt.next();
if (annotation.getType().getName().equals("markup")) {
//this node has an annotation of type markup, remove it:
annotationIt.remove();
//return true, this node had a markup annotation:
return true;
}
}
//this node did not have a markup annotation:
return false;
}
/**
* NOTE: This method is provided only for completeness. It analyzes spans annotated with
* "markup", and splits them into several shorter spans annotated with "begintag"
* and "endtag".
*
* @param tree the span tree to annotate into
* @param input a SpanNode that is annotated with "markup".
* @param text the text that the SpanNode covers
* @param beginTag the type to use for begintag annotations
* @param endTagType the type to use for endtag annotations
* @return a list of new spans to replace the input
*/
private List analyzeMarkup(SpanTree tree, SpanNode input, StringFieldValue text,
AnnotationType beginTag, AnnotationType endTagType) {
//we know that this node is annotated with "markup"
String coveredText = input.getText(text.getString()).toString();
int spanOffset = input.getFrom();
int tagStart = -1;
boolean endTag = false;
List tags = new ArrayList();
for (int i = 0; i < coveredText.length(); i++) {
if (coveredText.charAt(i) == '<') {
//we're in a tag
tagStart = i;
continue;
}
if (coveredText.charAt(i) == '>' && tagStart > -1) {
Span span = new Span(spanOffset + tagStart, (i + 1) - tagStart);
tags.add(span);
if (endTag) {
tree.annotate(span, endTagType);
} else {
tree.annotate(span, beginTag);
}
tagStart = -1;
}
if (tagStart > -1 && i == (tagStart + 1)) {
if (coveredText.charAt(i) == '/') {
endTag = true;
} else {
endTag = false;
}
}
}
return tags;
}
public void simple7() {
StringFieldValue text = new StringFieldValue("DiaryI live in San Francisco");
//012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
SpanTree tree = text.getSpanTree("html");
Iterator annotationIt = tree.iterator();
while (annotationIt.hasNext()) {
Annotation annotation = annotationIt.next();
if (annotation.getType().getName().equals("markup")) {
//we have an annotation of type markup, remove it:
annotationIt.remove();
}
}
}
}