blob: b94cfb03b2e44354e1090d70f9b9acefbb8be468 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.expressions;
import com.yahoo.document.DataType;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Transformer;
import java.util.logging.Logger;
import java.util.logging.Level;
/**
* @author Simon Thoresen Hult
*/
public final class NormalizeExpression extends Expression {
private final Linguistics linguistics;
private static final Logger logger = Logger.getLogger(NormalizeExpression.class.getName());
public NormalizeExpression(Linguistics linguistics) {
super(DataType.STRING);
this.linguistics = linguistics;
}
public Linguistics getLinguistics() {
return linguistics;
}
private static String escape(String str) {
StringBuilder buf = new StringBuilder();
for (char c : str.toCharArray()) {
if (c >= ' ') {
buf.append(c);
} else {
buf.append(String.format("U+%04X", (int)c));
}
}
return buf.toString();
}
@Override
protected void doExecute(ExecutionContext context) {
Transformer transformer = linguistics.getTransformer();
var orig = String.valueOf(context.getValue());
if (orig.isEmpty()) {
return; // must be a no-op for all linguistics/language combinations
}
var lang = context.resolveLanguage(linguistics);
var transformed = transformer.accentDrop(orig, lang);
try {
context.setValue(new StringFieldValue(transformed));
return;
} catch (IllegalArgumentException ex) {
String msg = ("bad normalize, \n" +
"original: >>> " + escape(orig) + " <<<\n" +
" -> accentDrop(" + lang + ") -> \n" +
"transformed: >>> " + escape(transformed) + " <<<");
logger.log(Level.SEVERE, msg);
}
context.setValue(new StringFieldValue(transformer.accentDrop(String.valueOf(context.getValue()),
context.resolveLanguage(linguistics))));
}
@Override
protected void doVerify(VerificationContext context) {
context.setValueType(createdOutputType());
}
@Override
public DataType createdOutputType() {
return DataType.STRING;
}
@Override
public String toString() {
return "normalize";
}
@Override
public boolean equals(Object o) {
if (!(o instanceof NormalizeExpression other)) return false;
if (linguistics.getClass() != other.linguistics.getClass()) return false;
return true;
}
@Override
public int hashCode() {
return getClass().hashCode();
}
}
|