summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2021-07-13 14:44:05 +0200
committerGitHub <noreply@github.com>2021-07-13 14:44:05 +0200
commit3f5a585788a52f4d2fdfa6686c5af777a06ee8ce (patch)
tree68fee3462aa4f392de96e8bcfecd74c4e56074c1
parent378279222376141546357dc913f58637ac83f158 (diff)
parent589923e168d45c76a230227c1a9dfebd2f5b6990 (diff)
Merge pull request #18599 from vespa-engine/arnej/retry-normalize
try to trap spurious failure
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeExpression.java30
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeTestCase.java35
2 files changed, 65 insertions, 0 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeExpression.java
index 9c6e698a154..78a7a858aba 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeExpression.java
@@ -6,12 +6,16 @@ import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Transformer;
+import java.util.logging.Logger;
+import java.util.logging.Level;
+
/**
* @author Simon Thoresen Hult
*/
public final class NormalizeExpression extends Expression {
private final Linguistics linguistics;
+ private static final Logger logger = Logger.getLogger(NormalizeExpression.class.getName());
public NormalizeExpression(Linguistics linguistics) {
super(DataType.STRING);
@@ -22,9 +26,35 @@ public final class NormalizeExpression extends Expression {
return linguistics;
}
+
+ private static String escape(String str) {
+ StringBuilder buf = new StringBuilder();
+ for (char c : str.toCharArray()) {
+ if (c >= ' ') {
+ buf.append(c);
+ } else {
+ buf.append(String.format("U+%04X", (int)c));
+ }
+ }
+ return buf.toString();
+ }
+
@Override
protected void doExecute(ExecutionContext context) {
Transformer transformer = linguistics.getTransformer();
+ var orig = String.valueOf(context.getValue());
+ var lang = context.resolveLanguage(linguistics);
+ var transformed = transformer.accentDrop(orig, lang);
+ try {
+ context.setValue(new StringFieldValue(transformed));
+ return;
+ } catch (IllegalArgumentException ex) {
+ String msg = ("bad normalize, \n" +
+ "original: >>> " + escape(orig) + " <<<\n" +
+ " -> accentDrop(" + lang + ") -> \n" +
+ "transformed: >>> " + escape(transformed) + " <<<");
+ logger.log(Level.SEVERE, msg);
+ }
context.setValue(new StringFieldValue(transformer.accentDrop(String.valueOf(context.getValue()),
context.resolveLanguage(linguistics))));
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeTestCase.java
index 8b4f1a8b344..3ad1b129b4d 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/NormalizeTestCase.java
@@ -6,8 +6,10 @@ import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.Language;
import com.yahoo.language.Linguistics;
+import com.yahoo.language.process.Transformer;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.SimpleTestAdapter;
+
import org.junit.Test;
import org.mockito.Mockito;
@@ -56,4 +58,37 @@ public class NormalizeTestCase {
assertTrue(val instanceof StringFieldValue);
assertEquals("beyonce", ((StringFieldValue)val).getString());
}
+
+ class MyMockTransformer implements Transformer {
+ boolean first = true;
+ @Override
+ public String accentDrop(String input, Language language) {
+ if (first) {
+ first = false;
+ return input.replace(' ', '\u0008');
+ } else {
+ return input.replace(' ', '/');
+ }
+ }
+ }
+
+ class MyMockLinguistics extends SimpleLinguistics {
+ private Transformer transformer = new MyMockTransformer();
+ @Override
+ public Transformer getTransformer() {
+ return transformer;
+ }
+ }
+
+ @Test
+ public void requireThatBadNormalizeRetries() {
+ ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
+ ctx.setLanguage(Language.ENGLISH);
+ ctx.setValue(new StringFieldValue("bad norm"));
+ var linguistics = new MyMockLinguistics();
+ new NormalizeExpression(linguistics).execute(ctx);
+ FieldValue val = ctx.getValue();
+ assertTrue(val instanceof StringFieldValue);
+ assertEquals("bad/norm", ((StringFieldValue)val).getString());
+ }
}