diff options
6 files changed, 164 insertions, 40 deletions
diff --git a/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java b/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java index 201ddcb3908..1e6065c63b9 100644 --- a/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java +++ b/cloud-tenant-cd/src/main/java/ai/vespa/hosted/cd/cloud/impl/VespaTestRuntime.java @@ -43,7 +43,7 @@ public class VespaTestRuntime implements TestRuntime { DefaultEndpointAuthenticator authenticator = new DefaultEndpointAuthenticator(config.system()); this.deploymentToTest = new HttpDeployment(config.deployments().get(config.zone()), authenticator); FeedClientBuilder.setEndpointAuthenticator(authenticator); - System.setProperty(ai.vespa.feed.client.FeedClientBuilder.PREFERRED_IMPLEMENTATION_PROPERTY, FeedClientBuilder.class.getName()); + ai.vespa.feed.client.FeedClientBuilder.setFeedClientBuilderSupplier(FeedClientBuilder::new); } @Override diff --git a/cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime b/cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime deleted file mode 100644 index 9d318b87fca..00000000000 --- a/cloud-tenant-cd/src/main/resources/META-INF/services/ai.vespa.hosted.cd.TestRuntime +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -ai.vespa.hosted.cd.cloud.impl.VespaTestRuntime
\ No newline at end of file diff --git a/juniper/src/vespa/juniper/sumdesc.cpp b/juniper/src/vespa/juniper/sumdesc.cpp index 969616423e8..fcee1eb605f 100644 --- a/juniper/src/vespa/juniper/sumdesc.cpp +++ b/juniper/src/vespa/juniper/sumdesc.cpp @@ -29,6 +29,10 @@ char printable_char(char c) return c; } +constexpr ucs4_t il_ann_anchor = 0xfff9; +constexpr ucs4_t il_ann_separator = 0xfffa; +constexpr ucs4_t il_ann_terminator = 0xfffb; + bool wordchar(const unsigned char* s) { unsigned char c = *s; @@ -40,6 +44,28 @@ bool wordchar(const unsigned char* s) } } +bool wordchar_or_il_ann_char(const unsigned char* s, ucs4_t annotation_char) +{ + unsigned char c = *s; + if (c & 0x80) { + ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); + return Fast_UnicodeUtil::IsWordChar(u) || + u == annotation_char; + } else { + return isalnum(c); + } +} + +bool wordchar_or_il_ann_anchor(const unsigned char* s) +{ + return wordchar_or_il_ann_char(s, il_ann_anchor); +} + +bool wordchar_or_il_ann_terminator(const unsigned char* s) +{ + return wordchar_or_il_ann_char(s, il_ann_terminator); +} + bool nonwordchar(const unsigned char* s) { unsigned char c = *s; @@ -51,6 +77,35 @@ bool nonwordchar(const unsigned char* s) } } +bool +il_ann_char(const unsigned char* s, ucs4_t annotation_char) +{ + unsigned char c = *s; + if (c & 0x80) { + ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); + return u == annotation_char; + } else { + return false; + } +} + +bool +il_ann_anchor_char(const unsigned char* s) +{ + return il_ann_char(s, il_ann_anchor); +} + +bool +il_ann_separator_char(const unsigned char* s) +{ + return il_ann_char(s, il_ann_separator); +} + +bool +il_ann_terminator_char(const unsigned char* s) +{ + return il_ann_char(s, il_ann_terminator); +} /* Move backwards/forwards from ptr (no longer than to start) in an * UTF8 text until the beginning of the word or (if space, until @@ -75,12 +130,24 @@ int complete_word(unsigned char* start, ssize_t length, } // Figure out if a word needs completion or if we are just going - // to eliminate whitespace + // to eliminate whitespace. Consider sequence from interlinear + // annotation anchor to interlinear annotation terminator to be a + // word. if (!wordchar(ptr)) { - whitespace_elim = true; - // Change direction of scan - increment = -increment; - chartest = wordchar; + if (increment > 0 && il_ann_anchor_char(ptr)) { + chartest = il_ann_terminator_char; + } else if (increment < 0 && il_ann_terminator_char(ptr)) { + chartest = il_ann_anchor_char; + } else { + whitespace_elim = true; + // Change direction of scan + increment = -increment; + if (increment > 0) { + chartest = wordchar_or_il_ann_anchor; + } else { + chartest = wordchar_or_il_ann_terminator; + } + } } else { // Found a wordchar at pointer // If moving forwards, we need to check the previous character @@ -89,12 +156,16 @@ int complete_word(unsigned char* start, ssize_t length, const unsigned char* pre_ptr = ptr; int cur_move = Fast_UnicodeUtil::UTF8move(start, length, pre_ptr, -1); - if (!wordchar(pre_ptr)) // Points at start of new word + if (!wordchar(pre_ptr) && !il_ann_terminator_char(pre_ptr)) // Points at start of new word { whitespace_elim = true; // Change direction of scan increment = -increment; - chartest = wordchar; + if (increment > 0) { + chartest = wordchar_or_il_ann_anchor; + } else { + chartest = wordchar_or_il_ann_terminator; + } ptr = pre_ptr; moved += cur_move; } else { @@ -129,6 +200,34 @@ int complete_word(unsigned char* start, ssize_t length, break; } if (chartest(ptr)) { + if (chartest == nonwordchar) { + if (il_ann_separator_char(ptr)) { + if (increment > 0) { + chartest = il_ann_terminator_char; + } else { + chartest = il_ann_anchor_char; + } + moved += cur_move; + continue; + } else if (il_ann_terminator_char(ptr)) { + if (increment < 0) { + chartest = il_ann_anchor_char; + } + moved += cur_move; + continue; + } else if (il_ann_anchor_char(ptr)) { + if (increment > 0) { + chartest = il_ann_terminator_char; + } + moved += cur_move; + continue; + } + } else if ((chartest == il_ann_anchor_char) || + (chartest == il_ann_terminator_char)) { + chartest = nonwordchar; + moved += cur_move; + continue; + } LOG(spam, "complete_word: Breaking at char %c/0x%x (%d)", printable_char(*ptr), *ptr, cur_move); // count this character (it is the first blank/wordchar) @@ -141,7 +240,9 @@ int complete_word(unsigned char* start, ssize_t length, break; // Found first blank/word char.. } moved += cur_move; - if (moved >= MAX_SCAN_WORD) { + if (moved >= MAX_SCAN_WORD && + (chartest != il_ann_anchor_char) && + (chartest != il_ann_terminator_char)) { LOG(spam, "Word length extended max word length %d, " "breaking at char 0x%x", MAX_SCAN_WORD, *ptr); break; @@ -486,8 +587,11 @@ int SummaryDesc::complete_extended_token(unsigned char* start, ssize_t length, // Only a single connector character that connects word // characters should lead us to include more words in the // normal sense: - if (!wordchar(preptr)) + if (!wordchar(preptr) && + !(increment > 0 && il_ann_anchor_char(preptr)) && + !(increment < 0 && il_ann_terminator_char(preptr))) { return moved; + } // If a block of chinese data does not contain any spaces we have to return // here in order to avoid searching all the way to the start/end. diff --git a/vespa-feed-client-api/abi-spec.json b/vespa-feed-client-api/abi-spec.json index 8af7798984f..16e532a2c9a 100644 --- a/vespa-feed-client-api/abi-spec.json +++ b/vespa-feed-client-api/abi-spec.json @@ -123,6 +123,7 @@ "methods": [ "public static ai.vespa.feed.client.FeedClientBuilder create(java.net.URI)", "public static ai.vespa.feed.client.FeedClientBuilder create(java.util.List)", + "public static void setFeedClientBuilderSupplier(java.util.function.Supplier)", "public abstract ai.vespa.feed.client.FeedClientBuilder setConnectionsPerEndpoint(int)", "public abstract ai.vespa.feed.client.FeedClientBuilder setMaxStreamPerConnection(int)", "public abstract ai.vespa.feed.client.FeedClientBuilder setSslContext(javax.net.ssl.SSLContext)", diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java index 05bc608df27..95c9b2c95fe 100644 --- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java +++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/FeedClientBuilder.java @@ -3,18 +3,13 @@ package ai.vespa.feed.client; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.SSLContext; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.nio.file.Path; import java.security.PrivateKey; import java.security.cert.X509Certificate; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Iterator; import java.util.List; -import java.util.ServiceLoader; import java.util.function.Supplier; /** @@ -32,31 +27,13 @@ public interface FeedClientBuilder { /** Creates a builder for multiple container endpoints **/ static FeedClientBuilder create(List<URI> endpoints) { - String defaultImplementation = "ai.vespa.feed.client.impl.FeedClientBuilderImpl"; - String preferredImplementation = System.getProperty(PREFERRED_IMPLEMENTATION_PROPERTY, defaultImplementation); - Iterator<FeedClientBuilder> iterator = ServiceLoader.load(FeedClientBuilder.class).iterator(); - if (iterator.hasNext()) { - List<FeedClientBuilder> builders = new ArrayList<>(); - iterator.forEachRemaining(builders::add); - return builders.stream() - .filter(builder -> preferredImplementation.equals(builder.getClass().getName())) - .findFirst() - .orElse(builders.get(0)); - } else { - try { - Class<?> aClass = Class.forName(preferredImplementation); - for (Constructor<?> constructor : aClass.getConstructors()) { - if (constructor.getParameterTypes().length==0) { - return ((FeedClientBuilder)constructor.newInstance()).setEndpointUris(endpoints); - } - } - throw new RuntimeException("Could not find Feed client builder implementation"); - } catch (ClassNotFoundException | InvocationTargetException | InstantiationException | IllegalAccessException e) { - throw new RuntimeException(e); - } - } + return Helper.getFeedClientBuilderSupplier().get().setEndpointUris(endpoints); } + /** Override FeedClientBuilder. This will be preferred in {@link #create} */ + static void setFeedClientBuilderSupplier(Supplier<FeedClientBuilder> supplier) { + Helper.setFeedClientBuilderSupplier(supplier); + } /** * Sets the number of connections this client will use per endpoint. * diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java index 59c12077bef..6971b2ea8f5 100644 --- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java +++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/Helper.java @@ -1,18 +1,62 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package ai.vespa.feed.client; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java.util.Objects; +import java.util.ServiceLoader; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import java.util.stream.Collectors; +import static ai.vespa.feed.client.FeedClientBuilder.PREFERRED_IMPLEMENTATION_PROPERTY; + /** * @author bjorncs */ class Helper { + private static final AtomicReference<Supplier<FeedClientBuilder>> feedClientBuilderSupplier = new AtomicReference<>(Helper::getFeedClientBuilder); + + static final void setFeedClientBuilderSupplier(Supplier<FeedClientBuilder> supplier) { + feedClientBuilderSupplier.set(supplier); + } + + static Supplier<FeedClientBuilder> getFeedClientBuilderSupplier() { + return feedClientBuilderSupplier.get(); + } + + static FeedClientBuilder getFeedClientBuilder() { + String defaultImplementation = "ai.vespa.feed.client.impl.FeedClientBuilderImpl"; + String preferredImplementation = System.getProperty(PREFERRED_IMPLEMENTATION_PROPERTY, defaultImplementation); + Iterator<FeedClientBuilder> iterator = ServiceLoader.load(FeedClientBuilder.class).iterator(); + if (iterator.hasNext()) { + List<FeedClientBuilder> builders = new ArrayList<>(); + iterator.forEachRemaining(builders::add); + return builders.stream() + .filter(builder -> preferredImplementation.equals(builder.getClass().getName())) + .findFirst() + .orElse(builders.get(0)); + } else { + try { + Class<?> aClass = Class.forName(preferredImplementation); + for (Constructor<?> constructor : aClass.getConstructors()) { + if (constructor.getParameterTypes().length == 0) { + return ((FeedClientBuilder) constructor.newInstance()); + } + } + throw new RuntimeException("Could not find Feed client builder implementation"); + } catch (ClassNotFoundException | InvocationTargetException | InstantiationException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + } + private Helper() {} @SafeVarargs |