summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java2
-rw-r--r--cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime2
-rw-r--r--juniper/src/vespa/juniper/sumdesc.cpp122
-rw-r--r--vespa-feed-client-api/abi-spec.json1
-rw-r--r--vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java33
-rw-r--r--vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java44
6 files changed, 164 insertions, 40 deletions
diff --git a/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java b/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java
index 201ddcb3908..1e6065c63b9 100644
--- a/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java
+++ b/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java
@@ -43,7 +43,7 @@ public class VespaTestRuntime implements TestRuntime {
DefaultEndpointAuthenticator authenticator = new DefaultEndpointAuthenticator(config.system());
this.deploymentToTest = new HttpDeployment(config.deployments().get(config.zone()), authenticator);
FeedClientBuilder.setEndpointAuthenticator(authenticator);
- System.setProperty(ai.vespa.feed.client.FeedClientBuilder.PREFERRED_IMPLEMENTATION_PROPERTY, FeedClientBuilder.class.getName());
+ ai.vespa.feed.client.FeedClientBuilder.setFeedClientBuilderSupplier(FeedClientBuilder::new);
}
@Override
diff --git a/cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime b/cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime
deleted file mode 100644
index 9d318b87fca..00000000000
--- a/cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-ai.vespa.hosted.cd.cloud.impl.VespaTestRuntime \ No newline at end of file
diff --git a/juniper/src/vespa/juniper/sumdesc.cpp b/juniper/src/vespa/juniper/sumdesc.cpp
index 969616423e8..fcee1eb605f 100644
--- a/juniper/src/vespa/juniper/sumdesc.cpp
+++ b/juniper/src/vespa/juniper/sumdesc.cpp
@@ -29,6 +29,10 @@ char printable_char(char c)
return c;
}
+constexpr ucs4_t il_ann_anchor = 0xfff9;
+constexpr ucs4_t il_ann_separator = 0xfffa;
+constexpr ucs4_t il_ann_terminator = 0xfffb;
+
bool wordchar(const unsigned char* s)
{
unsigned char c = *s;
@@ -40,6 +44,28 @@ bool wordchar(const unsigned char* s)
}
}
+bool wordchar_or_il_ann_char(const unsigned char* s, ucs4_t annotation_char)
+{
+ unsigned char c = *s;
+ if (c & 0x80) {
+ ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
+ return Fast_UnicodeUtil::IsWordChar(u) ||
+ u == annotation_char;
+ } else {
+ return isalnum(c);
+ }
+}
+
+bool wordchar_or_il_ann_anchor(const unsigned char* s)
+{
+ return wordchar_or_il_ann_char(s, il_ann_anchor);
+}
+
+bool wordchar_or_il_ann_terminator(const unsigned char* s)
+{
+ return wordchar_or_il_ann_char(s, il_ann_terminator);
+}
+
bool nonwordchar(const unsigned char* s)
{
unsigned char c = *s;
@@ -51,6 +77,35 @@ bool nonwordchar(const unsigned char* s)
}
}
+bool
+il_ann_char(const unsigned char* s, ucs4_t annotation_char)
+{
+ unsigned char c = *s;
+ if (c & 0x80) {
+ ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
+ return u == annotation_char;
+ } else {
+ return false;
+ }
+}
+
+bool
+il_ann_anchor_char(const unsigned char* s)
+{
+ return il_ann_char(s, il_ann_anchor);
+}
+
+bool
+il_ann_separator_char(const unsigned char* s)
+{
+ return il_ann_char(s, il_ann_separator);
+}
+
+bool
+il_ann_terminator_char(const unsigned char* s)
+{
+ return il_ann_char(s, il_ann_terminator);
+}
/* Move backwards/forwards from ptr (no longer than to start) in an
* UTF8 text until the beginning of the word or (if space, until
@@ -75,12 +130,24 @@ int complete_word(unsigned char* start, ssize_t length,
}
// Figure out if a word needs completion or if we are just going
- // to eliminate whitespace
+ // to eliminate whitespace. Consider sequence from interlinear
+ // annotation anchor to interlinear annotation terminator to be a
+ // word.
if (!wordchar(ptr)) {
- whitespace_elim = true;
- // Change direction of scan
- increment = -increment;
- chartest = wordchar;
+ if (increment > 0 && il_ann_anchor_char(ptr)) {
+ chartest = il_ann_terminator_char;
+ } else if (increment < 0 && il_ann_terminator_char(ptr)) {
+ chartest = il_ann_anchor_char;
+ } else {
+ whitespace_elim = true;
+ // Change direction of scan
+ increment = -increment;
+ if (increment > 0) {
+ chartest = wordchar_or_il_ann_anchor;
+ } else {
+ chartest = wordchar_or_il_ann_terminator;
+ }
+ }
} else {
// Found a wordchar at pointer
// If moving forwards, we need to check the previous character
@@ -89,12 +156,16 @@ int complete_word(unsigned char* start, ssize_t length,
const unsigned char* pre_ptr = ptr;
int cur_move = Fast_UnicodeUtil::UTF8move(start, length,
pre_ptr, -1);
- if (!wordchar(pre_ptr)) // Points at start of new word
+ if (!wordchar(pre_ptr) && !il_ann_terminator_char(pre_ptr)) // Points at start of new word
{
whitespace_elim = true;
// Change direction of scan
increment = -increment;
- chartest = wordchar;
+ if (increment > 0) {
+ chartest = wordchar_or_il_ann_anchor;
+ } else {
+ chartest = wordchar_or_il_ann_terminator;
+ }
ptr = pre_ptr;
moved += cur_move;
} else {
@@ -129,6 +200,34 @@ int complete_word(unsigned char* start, ssize_t length,
break;
}
if (chartest(ptr)) {
+ if (chartest == nonwordchar) {
+ if (il_ann_separator_char(ptr)) {
+ if (increment > 0) {
+ chartest = il_ann_terminator_char;
+ } else {
+ chartest = il_ann_anchor_char;
+ }
+ moved += cur_move;
+ continue;
+ } else if (il_ann_terminator_char(ptr)) {
+ if (increment < 0) {
+ chartest = il_ann_anchor_char;
+ }
+ moved += cur_move;
+ continue;
+ } else if (il_ann_anchor_char(ptr)) {
+ if (increment > 0) {
+ chartest = il_ann_terminator_char;
+ }
+ moved += cur_move;
+ continue;
+ }
+ } else if ((chartest == il_ann_anchor_char) ||
+ (chartest == il_ann_terminator_char)) {
+ chartest = nonwordchar;
+ moved += cur_move;
+ continue;
+ }
LOG(spam, "complete_word: Breaking at char %c/0x%x (%d)", printable_char(*ptr),
*ptr, cur_move);
// count this character (it is the first blank/wordchar)
@@ -141,7 +240,9 @@ int complete_word(unsigned char* start, ssize_t length,
break; // Found first blank/word char..
}
moved += cur_move;
- if (moved >= MAX_SCAN_WORD) {
+ if (moved >= MAX_SCAN_WORD &&
+ (chartest != il_ann_anchor_char) &&
+ (chartest != il_ann_terminator_char)) {
LOG(spam, "Word length extended max word length %d, "
"breaking at char 0x%x", MAX_SCAN_WORD, *ptr);
break;
@@ -486,8 +587,11 @@ int SummaryDesc::complete_extended_token(unsigned char* start, ssize_t length,
// Only a single connector character that connects word
// characters should lead us to include more words in the
// normal sense:
- if (!wordchar(preptr))
+ if (!wordchar(preptr) &&
+ !(increment > 0 && il_ann_anchor_char(preptr)) &&
+ !(increment < 0 && il_ann_terminator_char(preptr))) {
return moved;
+ }
// If a block of chinese data does not contain any spaces we have to return
// here in order to avoid searching all the way to the start/end.
diff --git a/vespa-feed-client-api/abi-spec.json b/vespa-feed-client-api/abi-spec.json
index 8af7798984f..16e532a2c9a 100644
--- a/vespa-feed-client-api/abi-spec.json
+++ b/vespa-feed-client-api/abi-spec.json
@@ -123,6 +123,7 @@
"methods": [
"public static ai.vespa.feed.client.FeedClientBuilder create(java.net.URI)",
"public static ai.vespa.feed.client.FeedClientBuilder create(java.util.List)",
+ "public static void setFeedClientBuilderSupplier(java.util.function.Supplier)",
"public abstract ai.vespa.feed.client.FeedClientBuilder setConnectionsPerEndpoint(int)",
"public abstract ai.vespa.feed.client.FeedClientBuilder setMaxStreamPerConnection(int)",
"public abstract ai.vespa.feed.client.FeedClientBuilder setSslContext(javax.net.ssl.SSLContext)",
diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java
index 05bc608df27..95c9b2c95fe 100644
--- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java
+++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java
@@ -3,18 +3,13 @@ package ai.vespa.feed.client;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.SSLContext;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.nio.file.Path;
import java.security.PrivateKey;
import java.security.cert.X509Certificate;
-import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
-import java.util.Iterator;
import java.util.List;
-import java.util.ServiceLoader;
import java.util.function.Supplier;
/**
@@ -32,31 +27,13 @@ public interface FeedClientBuilder {
/** Creates a builder for multiple container endpoints **/
static FeedClientBuilder create(List<URI> endpoints) {
- String defaultImplementation = "ai.vespa.feed.client.impl.FeedClientBuilderImpl";
- String preferredImplementation = System.getProperty(PREFERRED_IMPLEMENTATION_PROPERTY, defaultImplementation);
- Iterator<FeedClientBuilder> iterator = ServiceLoader.load(FeedClientBuilder.class).iterator();
- if (iterator.hasNext()) {
- List<FeedClientBuilder> builders = new ArrayList<>();
- iterator.forEachRemaining(builders::add);
- return builders.stream()
- .filter(builder -> preferredImplementation.equals(builder.getClass().getName()))
- .findFirst()
- .orElse(builders.get(0));
- } else {
- try {
- Class<?> aClass = Class.forName(preferredImplementation);
- for (Constructor<?> constructor : aClass.getConstructors()) {
- if (constructor.getParameterTypes().length==0) {
- return ((FeedClientBuilder)constructor.newInstance()).setEndpointUris(endpoints);
- }
- }
- throw new RuntimeException("Could not find Feed client builder implementation");
- } catch (ClassNotFoundException | InvocationTargetException | InstantiationException | IllegalAccessException e) {
- throw new RuntimeException(e);
- }
- }
+ return Helper.getFeedClientBuilderSupplier().get().setEndpointUris(endpoints);
}
+ /** Override FeedClientBuilder. This will be preferred in {@link #create} */
+ static void setFeedClientBuilderSupplier(Supplier<FeedClientBuilder> supplier) {
+ Helper.setFeedClientBuilderSupplier(supplier);
+ }
/**
* Sets the number of connections this client will use per endpoint.
*
diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java
index 59c12077bef..6971b2ea8f5 100644
--- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java
+++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java
@@ -1,18 +1,62 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package ai.vespa.feed.client;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import java.util.Objects;
+import java.util.ServiceLoader;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
import java.util.stream.Collectors;
+import static ai.vespa.feed.client.FeedClientBuilder.PREFERRED_IMPLEMENTATION_PROPERTY;
+
/**
* @author bjorncs
*/
class Helper {
+ private static final AtomicReference<Supplier<FeedClientBuilder>> feedClientBuilderSupplier = new AtomicReference<>(Helper::getFeedClientBuilder);
+
+ static final void setFeedClientBuilderSupplier(Supplier<FeedClientBuilder> supplier) {
+ feedClientBuilderSupplier.set(supplier);
+ }
+
+ static Supplier<FeedClientBuilder> getFeedClientBuilderSupplier() {
+ return feedClientBuilderSupplier.get();
+ }
+
+ static FeedClientBuilder getFeedClientBuilder() {
+ String defaultImplementation = "ai.vespa.feed.client.impl.FeedClientBuilderImpl";
+ String preferredImplementation = System.getProperty(PREFERRED_IMPLEMENTATION_PROPERTY, defaultImplementation);
+ Iterator<FeedClientBuilder> iterator = ServiceLoader.load(FeedClientBuilder.class).iterator();
+ if (iterator.hasNext()) {
+ List<FeedClientBuilder> builders = new ArrayList<>();
+ iterator.forEachRemaining(builders::add);
+ return builders.stream()
+ .filter(builder -> preferredImplementation.equals(builder.getClass().getName()))
+ .findFirst()
+ .orElse(builders.get(0));
+ } else {
+ try {
+ Class<?> aClass = Class.forName(preferredImplementation);
+ for (Constructor<?> constructor : aClass.getConstructors()) {
+ if (constructor.getParameterTypes().length == 0) {
+ return ((FeedClientBuilder) constructor.newInstance());
+ }
+ }
+ throw new RuntimeException("Could not find Feed client builder implementation");
+ } catch (ClassNotFoundException | InvocationTargetException | InstantiationException | IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
private Helper() {}
@SafeVarargs