diff options
87 files changed, 1881 insertions, 961 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java index 1620c90acd1..d7e9e0da081 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java @@ -56,6 +56,9 @@ public class Index implements Cloneable, Serializable { /** The boolean index definition, if set */ private BooleanIndexDefinition boolIndex; + // TODO: Remove when experimental posting list format is made default + private boolean experimentalPostingListFormat = false; + public Index(String name) { this(name, false); } @@ -181,4 +184,12 @@ public class Index implements Cloneable, Serializable { boolIndex = def; } + public void setExperimentalPostingListFormat(boolean value) { + experimentalPostingListFormat = value; + } + + public boolean useExperimentalPostingListFormat() { + return experimentalPostingListFormat; + } + } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java index f8766afbc7b..6f6e97a0876 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java @@ -113,7 +113,8 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { .datatype(IndexschemaConfig.Indexfield.Datatype.Enum.valueOf(f.getType())) .prefix(f.hasPrefix()) .phrases(f.hasPhrases()) - .positions(f.hasPositions()); + .positions(f.hasPositions()) + .experimentalpostinglistformat(f.useExperimentalPostingListFormat()); if (!f.getCollectionType().equals("SINGLE")) { ifB.collectiontype(IndexschemaConfig.Indexfield.Collectiontype.Enum.valueOf(f.getCollectionType())); } @@ -174,6 +175,8 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { private boolean phrases = false; // TODO dead, but keep a while to ensure config compatibility? private boolean positions = true;// TODO dead, but keep a while to ensure config compatibility? private BooleanIndexDefinition boolIndex = null; + // TODO: Remove when experimental posting list format is made default + private boolean experimentalPostingListFormat = false; public IndexField(String name, Index.Type type, DataType sdFieldType) { this.name = name; @@ -183,6 +186,7 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { public void setIndexSettings(com.yahoo.searchdefinition.Index index) { if (type.equals(Index.Type.TEXT)) { prefix = index.isPrefix(); + experimentalPostingListFormat = index.useExperimentalPostingListFormat(); } sdType = index.getType(); boolIndex = index.getBooleanIndexDefiniton(); @@ -205,6 +209,7 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { public boolean hasPrefix() { return prefix; } public boolean hasPhrases() { return phrases; } public boolean hasPositions() { return positions; } + public boolean useExperimentalPostingListFormat() { return experimentalPostingListFormat; } public BooleanIndexDefinition getBooleanIndexDefinition() { return boolIndex; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java index 6df4ca2a6e1..459bb247e5f 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java @@ -29,6 +29,8 @@ public class IndexOperation implements FieldOperation { private OptionalLong lowerBound = OptionalLong.empty(); private OptionalLong upperBound = OptionalLong.empty(); private OptionalDouble densePostingListThreshold = OptionalDouble.empty(); + // TODO: Remove when experimental posting list format is made default + private Optional<Boolean> experimentalPostingListFormat = Optional.empty(); public String getIndexName() { return indexName; @@ -87,6 +89,9 @@ public class IndexOperation implements FieldOperation { index.setBooleanIndexDefiniton( new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold)); } + if (experimentalPostingListFormat.isPresent()) { + index.setExperimentalPostingListFormat(experimentalPostingListFormat.get()); + } } public Type getType() { @@ -112,5 +117,8 @@ public class IndexOperation implements FieldOperation { public void setDensePostingListThreshold(double densePostingListThreshold) { this.densePostingListThreshold = OptionalDouble.of(densePostingListThreshold); } + public void setExperimentalPostingListFormat(boolean value) { + experimentalPostingListFormat = Optional.of(value); + } } diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index de5146ec7d2..571ad452b01 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -331,6 +331,7 @@ TOKEN : | < LOWERBOUND: "lower-bound" > | < UPPERBOUND: "upper-bound" > | < DENSEPOSTINGLISTTHRESHOLD: "dense-posting-list-threshold" > +| < EXPERIMENTALPOSTINGLISTFORMAT: "experimental-posting-list-format" > | < SUMMARYFEATURES_SL: "summary-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < SUMMARYFEATURES_ML: "summary-features" (<SEARCHLIB_SKIP>)? "{" (~["}"])* "}" > | < RANKFEATURES_SL: "rank-features" (" ")* ":" (~["}","\n"])* ("\n")? > @@ -1781,6 +1782,7 @@ Object indexBody(IndexOperation index) : | <LOWERBOUND> <COLON> num = consumeLong() { index.setLowerBound(num); } | <UPPERBOUND> <COLON> num = consumeLong() { index.setUpperBound(num); } | <DENSEPOSTINGLISTTHRESHOLD> <COLON> threshold = consumeFloat() { index.setDensePostingListThreshold(threshold); } + | <EXPERIMENTALPOSTINGLISTFORMAT> { index.setExperimentalPostingListFormat(true); } ) { return null; } } diff --git a/config-model/src/main/perl/vespa-deploy b/config-model/src/main/perl/vespa-deploy index 22181858d70..ffde937bea0 100755 --- a/config-model/src/main/perl/vespa-deploy +++ b/config-model/src/main/perl/vespa-deploy @@ -87,7 +87,7 @@ readConfFile(); use strict; use warnings; use feature qw(switch say); -use vars qw/ $opt_c $opt_h $opt_n $opt_v $opt_f $opt_t $opt_a $opt_e $opt_E $opt_r $opt_i $opt_p $opt_H $opt_R $opt_F $opt_V /; +use vars qw/ $opt_c $opt_h $opt_n $opt_v $opt_f $opt_t $opt_a $opt_e $opt_E $opt_r $opt_i $opt_p $opt_z $opt_H $opt_R $opt_F $opt_V /; use Env qw($HOME); use JSON; use Getopt::Std; @@ -98,16 +98,12 @@ my $cloudconfig_dir = "$HOME/.cloudconfig"; my $session_id_file; my $configsource_url_used_file = "$cloudconfig_dir/deploy-configsource-url-used"; -my $curl_command = 'curl -A vespa-deploy --silent --show-error --connect-timeout 30 --max-time 1200'; -my $CURL_PUT = $curl_command . ' --write-out \%{http_code} --request PUT'; -my $CURL_GET = $curl_command . ' --request GET'; -my $GZIP = "gzip"; -my $CURL_POST_WITH_HEADERS = $curl_command . ' -i --request POST --header "Content-Type: application/x-gzip" --data-binary @- -D /tmp/http-headers'; -my $CURL_POST = $curl_command . ' --write-out \%{http_code} --request POST --header "Content-Type: application/x-gzip" --data-binary @-'; -my $CURL_POST_ZIP = $curl_command . ' --write-out \%{http_code} --request POST --header "Content-Type: application/zip" --data-binary @-'; my $pathPrefix; +my $siaPath; +my $siaCertsPath; +my $siaKeysPath; my $tenant = "default"; my $application = "default"; my $environment = "prod"; @@ -116,8 +112,8 @@ my $instance = "default"; my $version = "v2"; my $configserver = ""; my $port = "19071"; - -getopts('c:fhnt:ve:E:r:a:i:p:HR:F:V:'); +my $cert = ""; +getopts('c:fhnt:ve:E:r:a:i:p:z:HR:F:V:'); if ($opt_h) { usage(); @@ -152,8 +148,18 @@ if ($opt_p) { $port = $opt_p; } +if ($opt_z) { + $cert = $opt_z; +} + $pathPrefix = "/application/v2/tenant/$tenant/session"; +$siaPath = "/var/lib/sia/"; + +$siaCertsPath = $siaPath . "certs/"; + +$siaKeysPath = $siaPath . "keys/"; + create_cloudconfig_dir(); $session_id_file = "$cloudconfig_dir/$tenant/deploy-session-id"; @@ -161,6 +167,17 @@ $session_id_file = "$cloudconfig_dir/$tenant/deploy-session-id"; my $command = shift; $command ||= "help"; +my $curl_command = 'curl -A vespa-deploy --silent --show-error --connect-timeout 30 --max-time 1200'; +if ($cert) { + $curl_command = $curl_command . " -k --cert " . $siaCertsPath . $cert . ".cert.pem --key " . $siaKeysPath . $cert . ".key.pem "; +} + +my $CURL_PUT = $curl_command . ' --write-out \%{http_code} --request PUT'; +my $CURL_GET = $curl_command . ' --request GET'; +my $GZIP = "gzip"; +my $CURL_POST_WITH_HEADERS = $curl_command . ' -i --request POST --header "Content-Type: application/x-gzip" --data-binary @- -D /tmp/http-headers'; +my $CURL_POST = $curl_command . ' --write-out \%{http_code} --request POST --header "Content-Type: application/x-gzip" --data-binary @-'; +my $CURL_POST_ZIP = $curl_command . ' --write-out \%{http_code} --request POST --header "Content-Type: application/zip" --data-binary @-'; if ($command eq "upload") { my $application_package = shift; @@ -237,7 +254,7 @@ sub usage { } elsif ($command eq "fetch") { usage_fetch(); } else { - print "Usage: vespa-deploy [-h] [-v] [-f] [-t] [-p] [-V] [<command>] [args]\n"; + print "Usage: vespa-deploy [-h] [-v] [-f] [-t] [-c] [-p] [-z] [-V] [<command>] [args]\n"; print "Supported commands: 'upload', 'prepare', 'activate', 'fetch' and 'help'\n"; print "Supported options: '-h' (help), '-v' (verbose), '-f' (force/ignore validation errors), '-t' (timeout in seconds), '-p' (config server http port)\n"; print " '-h' (help)\n"; @@ -246,8 +263,10 @@ sub usage { print " '-f' (force/ignore validation errors)\n"; print " '-t <timeout>' (timeout in seconds)\n"; print " '-c <server>' (config server hostname)\n"; - print " '-p <port>' (config server http port)\n\n"; - + print " '-p <port>' (config server http port)\n"; + print " '-z <cert>' (cert/key name)\n\n"; + + print "Try 'vespa-deploy help <command>' to get more help\n"; } } @@ -328,7 +347,11 @@ sub get_configsource_url { my @configsources; if ($configserver and $configserver ne "") { - @configsources = ('http://' . $configserver . ':' . $port . '/'); + if ($cert and $cert ne "") { + @configsources = ('https://' . $configserver . ':' . $port . '/'); + } else { + @configsources = ('http://' . $configserver . ':' . $port . '/'); + } } else { @configsources = split(' ', `$VESPA_HOME/bin/vespa-print-default configservers_http`); } diff --git a/config-model/src/main/python/ES_Vespa_parser.py b/config-model/src/main/python/ES_Vespa_parser.py index 4721dbe8128..deebb1d6973 100644 --- a/config-model/src/main/python/ES_Vespa_parser.py +++ b/config-model/src/main/python/ES_Vespa_parser.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. import json import argparse diff --git a/config-model/src/test/derived/indexschema/index-info.cfg b/config-model/src/test/derived/indexschema/index-info.cfg index 2ba3a5a99ee..3a420e12a24 100644 --- a/config-model/src/test/derived/indexschema/index-info.cfg +++ b/config-model/src/test/derived/indexschema/index-info.cfg @@ -133,6 +133,16 @@ indexinfo[].command[].indexname "exact2" indexinfo[].command[].command "lowercase" indexinfo[].command[].indexname "exact2" indexinfo[].command[].command "exact @@" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "index" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "stem:BEST" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "normalize" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "plain-tokens" indexinfo[].command[].indexname "ia" indexinfo[].command[].command "index" indexinfo[].command[].indexname "ia" diff --git a/config-model/src/test/derived/indexschema/indexschema.cfg b/config-model/src/test/derived/indexschema/indexschema.cfg index d1f43c4a81e..612af087b0c 100644 --- a/config-model/src/test/derived/indexschema/indexschema.cfg +++ b/config-model/src/test/derived/indexschema/indexschema.cfg @@ -5,6 +5,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sb" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -12,6 +13,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sc" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -19,6 +21,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sd" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -26,6 +29,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sf" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -33,6 +37,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sg" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -40,6 +45,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -47,6 +53,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "si" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -54,6 +61,7 @@ indexfield[].prefix true indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "exact1" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -61,6 +69,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "exact2" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -68,6 +77,15 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false +indexfield[].name "experimental" +indexfield[].datatype STRING +indexfield[].collectiontype SINGLE +indexfield[].prefix false +indexfield[].phrases false +indexfield[].positions true +indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat true indexfield[].name "nostemstring1" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -75,6 +93,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "nostemstring2" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -82,6 +101,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "nostemstring3" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -89,6 +109,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "nostemstring4" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -96,6 +117,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "fs9" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -103,6 +125,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sd_literal" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -110,6 +133,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.fragment" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -117,6 +141,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.host" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -124,6 +149,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.hostname" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -131,6 +157,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.path" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -138,6 +165,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.port" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -145,6 +173,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.query" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -152,6 +181,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.scheme" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -159,6 +189,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false fieldset[].name "fs9" fieldset[].field[].name "se" fieldset[].name "fs1" diff --git a/config-model/src/test/derived/indexschema/indexschema.sd b/config-model/src/test/derived/indexschema/indexschema.sd index c28711813c3..44956f30e9e 100644 --- a/config-model/src/test/derived/indexschema/indexschema.sd +++ b/config-model/src/test/derived/indexschema/indexschema.sd @@ -56,6 +56,10 @@ search indexschema { exact } } + field experimental type string { + indexing: index + index: experimental-posting-list-format + } # integer fields field ia type int { diff --git a/config-model/src/test/derived/indexschema/vsmfields.cfg b/config-model/src/test/derived/indexschema/vsmfields.cfg index b6c50869e34..30ed67f61b7 100644 --- a/config-model/src/test/derived/indexschema/vsmfields.cfg +++ b/config-model/src/test/derived/indexschema/vsmfields.cfg @@ -55,6 +55,11 @@ fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX +fieldspec[].name "experimental" +fieldspec[].searchmethod AUTOUTF8 +fieldspec[].arg1 "" +fieldspec[].maxlength 1048576 +fieldspec[].fieldtype INDEX fieldspec[].name "ia" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" @@ -133,6 +138,8 @@ documenttype[].index[].name "exact1" documenttype[].index[].field[].name "exact1" documenttype[].index[].name "exact2" documenttype[].index[].field[].name "exact2" +documenttype[].index[].name "experimental" +documenttype[].index[].field[].name "experimental" documenttype[].index[].name "ia" documenttype[].index[].field[].name "ia" documenttype[].index[].name "ib" diff --git a/config-model/src/test/derived/uri_array/indexschema.cfg b/config-model/src/test/derived/uri_array/indexschema.cfg index 8593c594e3c..1a556daf558 100644 --- a/config-model/src/test/derived/uri_array/indexschema.cfg +++ b/config-model/src/test/derived/uri_array/indexschema.cfg @@ -5,6 +5,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.fragment" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -12,6 +13,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.host" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -19,6 +21,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.hostname" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -26,6 +29,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.path" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -33,6 +37,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.port" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -40,6 +45,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.query" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -47,6 +53,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.scheme" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -54,3 +61,4 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false diff --git a/config-model/src/test/derived/uri_wset/indexschema.cfg b/config-model/src/test/derived/uri_wset/indexschema.cfg index a432556bb6b..7fe7f7a4941 100644 --- a/config-model/src/test/derived/uri_wset/indexschema.cfg +++ b/config-model/src/test/derived/uri_wset/indexschema.cfg @@ -5,6 +5,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.fragment" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -12,6 +13,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.host" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -19,6 +21,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.hostname" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -26,6 +29,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.path" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -33,6 +37,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.port" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -40,6 +45,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.query" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -47,6 +53,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.scheme" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -54,3 +61,4 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/MemoryCache.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/MemoryCache.java index 0fe7fe01467..80bca85d9e4 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/MemoryCache.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/MemoryCache.java @@ -40,7 +40,8 @@ public class MemoryCache { * @param config config to put in cache */ public void put(RawConfig config) { - if (config.isError()) return; + // Do not cache errors or empty configs (which have generation 0) + if (config.isError() || config.getGeneration() == 0) return; log.log(LogLevel.DEBUG, () -> "Putting '" + config + "' into memory cache"); cache.put(new ConfigCacheKey(config.getKey(), config.getDefMd5()), config); diff --git a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/RpcConfigSourceClient.java b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/RpcConfigSourceClient.java index b4eda05fde4..9306220f518 100644 --- a/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/RpcConfigSourceClient.java +++ b/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/RpcConfigSourceClient.java @@ -132,7 +132,7 @@ class RpcConfigSourceClient implements ConfigSourceClient { ret = cachedConfig; } } - if (!cachedConfig.isError()) { + if (!cachedConfig.isError() && cachedConfig.getGeneration() > 0) { needToGetConfig = false; } } diff --git a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/ProxyServerTest.java b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/ProxyServerTest.java index 22488da7c80..e21df82a036 100644 --- a/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/ProxyServerTest.java +++ b/config-proxy/src/test/java/com/yahoo/vespa/config/proxy/ProxyServerTest.java @@ -171,6 +171,40 @@ public class ProxyServerTest { assertEquals(1, cacheManager.size()); } + /** + * Verifies that config with generation 0 (used for empty sentinel config) is not cached. + * If it was cached, it would be served even when newer config is available + * (as they ask for config, saying that it now has config with generation 0). + * When the config has been successfully retrieved it must be put in the cache. + */ + @Test + public void testNoCachingOfEmptyConfig() { + ConfigTester tester = new ConfigTester(); + // Simulate an empty response + RawConfig emptyConfig = new RawConfig(fooConfig.getKey(), fooConfig.getDefMd5(), + Payload.from("{}"), fooConfig.getConfigMd5(), + 0, false, 0, fooConfig.getDefContent(), Optional.empty()); + source.put(fooConfig.getKey(), emptyConfig); + + MemoryCache cache = proxy.getMemoryCache(); + assertEquals(0, cache.size()); + + RawConfig res = proxy.resolveConfig(tester.createRequest(fooConfig)); + assertNotNull(res.getPayload()); + assertThat(res.getPayload().toString(), is(emptyConfig.getPayload().toString())); + assertEquals(0, cache.size()); + + // Put a version of the same config into backend with new generation and see that it now works (i.e. we are + // not getting a cached response (of the error in the previous request) + source.put(fooConfig.getKey(), createConfigWithNextConfigGeneration(fooConfig, 0)); + + // Verify that we get the config now and that it is cached + res = proxy.resolveConfig(tester.createRequest(fooConfig)); + assertNotNull(res.getPayload().getData()); + assertThat(res.getPayload().toString(), is(ConfigTester.fooPayload.toString())); + assertEquals(1, cache.size()); + } + @Test public void testReconfiguration() { ConfigTester tester = new ConfigTester(); diff --git a/configdefinitions/src/vespa/indexschema.def b/configdefinitions/src/vespa/indexschema.def index 7153164bfab..c0c9f175837 100644 --- a/configdefinitions/src/vespa/indexschema.def +++ b/configdefinitions/src/vespa/indexschema.def @@ -16,6 +16,8 @@ indexfield[].phrases bool default=false indexfield[].positions bool default=true ## Average element length indexfield[].averageelementlen int default=512 +## Whether we should use a new experimental posting list format for this field. +indexfield[].experimentalpostinglistformat bool default=false ## The name of the field collection (aka logical view). fieldset[].name string diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/MemoryNameService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/MemoryNameService.java index 80b3a0bf25c..75d49e542dc 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/MemoryNameService.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/MemoryNameService.java @@ -22,10 +22,19 @@ public class MemoryNameService implements NameService { return Collections.unmodifiableSet(records); } + private void add(Record record) { + if (records.stream().anyMatch(r -> r.type().equals(record.type()) && + r.name().equals(record.name()) && + r.data().equals(record.data()))) { + throw new IllegalArgumentException("Record already exists: " + record); + } + records.add(record); + } + @Override public Record createCname(RecordName name, RecordData canonicalName) { var record = new Record(Record.Type.CNAME, name, canonicalName); - records.add(record); + add(record); return record; } @@ -37,7 +46,7 @@ public class MemoryNameService implements NameService { .collect(Collectors.toList()); // Satisfy idempotency contract of interface removeRecords(findRecords(Record.Type.ALIAS, name)); - this.records.addAll(records); + records.forEach(this::add); return records; } @@ -46,7 +55,7 @@ public class MemoryNameService implements NameService { var records = txtData.stream() .map(data -> new Record(Record.Type.TXT, name, data)) .collect(Collectors.toList()); - this.records.addAll(records); + records.forEach(this::add); return records; } @@ -76,7 +85,7 @@ public class MemoryNameService implements NameService { } var existing = records.get(0); this.records.remove(existing); - this.records.add(new Record(existing.type(), existing.name(), newData)); + add(new Record(existing.type(), existing.name(), newData)); } @Override diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/Record.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/Record.java index a4219569ab1..7beb3076156 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/Record.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/dns/Record.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.api.integration.dns; +import java.time.Duration; import java.util.Comparator; import java.util.Objects; @@ -16,20 +17,31 @@ public class Record implements Comparable<Record> { .thenComparing(Record::data); private final Type type; + private final Duration ttl; private final RecordName name; private final RecordData data; - public Record(Type type, RecordName name, RecordData data) { + public Record(Type type, Duration ttl, RecordName name, RecordData data) { this.type = Objects.requireNonNull(type, "type cannot be null"); + this.ttl = Objects.requireNonNull(ttl, "ttl cannot be null"); this.name = Objects.requireNonNull(name, "name cannot be null"); this.data = Objects.requireNonNull(data, "data cannot be null"); } + public Record(Type type, RecordName name, RecordData data) { + this(type, Duration.ofMinutes(5), name, data); + } + /** DNS type of this */ public Type type() { return type; } + /** The TTL value of this */ + public Duration ttl() { + return ttl; + } + /** Data in this, e.g. IP address for records of type A */ public RecordData data() { return data; @@ -58,7 +70,7 @@ public class Record implements Comparable<Record> { @Override public String toString() { - return String.format("%s %s -> %s", type, name, data); + return String.format("%s %s -> %s [TTL: %s]", type, name, data, ttl); } @Override @@ -67,13 +79,14 @@ public class Record implements Comparable<Record> { if (o == null || getClass() != o.getClass()) return false; Record record = (Record) o; return type == record.type && + ttl.equals(record.ttl) && name.equals(record.name) && data.equals(record.data); } @Override public int hashCode() { - return Objects.hash(type, name, data); + return Objects.hash(type, ttl, name, data); } @Override diff --git a/controller-server/pom.xml b/controller-server/pom.xml index f22142db727..3e7247bd44b 100644 --- a/controller-server/pom.xml +++ b/controller-server/pom.xml @@ -131,6 +131,12 @@ <version>${project.version}</version> </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>hosted-api</artifactId> + <version>${project.version}</version> + </dependency> + <!-- test --> <dependency> diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/RoutingPolicy.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/RoutingPolicy.java index 2fc852d79d5..e0145e6b94c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/RoutingPolicy.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/RoutingPolicy.java @@ -85,16 +85,12 @@ public class RoutingPolicy { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RoutingPolicy policy = (RoutingPolicy) o; - return owner.equals(policy.owner) && - cluster.equals(policy.cluster) && - zone.equals(policy.zone) && - canonicalName.equals(policy.canonicalName) && - dnsZone.equals(policy.dnsZone); + return canonicalName.equals(policy.canonicalName); } @Override public int hashCode() { - return Objects.hash(owner, cluster, zone, canonicalName, dnsZone); + return Objects.hash(canonicalName); } @Override diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/dns/RemoveRecords.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/dns/RemoveRecords.java index 2195a527679..b721f66e452 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/dns/RemoveRecords.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/dns/RemoveRecords.java @@ -67,7 +67,7 @@ public class RemoveRecords implements NameServiceRequest { @Override public String toString() { - return "remove records of type " + type + " by " + + return "remove records of type " + type + ", by " + name.map(n -> "name " + n).orElse("") + data.map(d -> "data " + d).orElse(""); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainer.java index c52306266a4..7e0032f03c5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainer.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.api.integration.dns.NameService; import com.yahoo.vespa.hosted.controller.api.integration.dns.Record; import com.yahoo.vespa.hosted.controller.api.integration.dns.RecordData; import com.yahoo.vespa.hosted.controller.dns.NameServiceQueue.Priority; @@ -11,7 +12,12 @@ import com.yahoo.vespa.hosted.controller.rotation.RotationLock; import com.yahoo.vespa.hosted.controller.rotation.RotationRepository; import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; /** * Performs DNS maintenance tasks such as removing DNS aliases for unassigned rotations. @@ -20,6 +26,8 @@ import java.util.Map; */ public class DnsMaintainer extends Maintainer { + private final AtomicInteger rotationIndex = new AtomicInteger(0); + public DnsMaintainer(Controller controller, Duration interval, JobControl jobControl) { super(controller, interval, jobControl); } @@ -32,7 +40,7 @@ public class DnsMaintainer extends Maintainer { protected void maintain() { try (RotationLock lock = rotationRepository().lock()) { Map<RotationId, Rotation> unassignedRotations = rotationRepository().availableRotations(lock); - unassignedRotations.values().forEach(this::removeCname); + rotationToCheckOf(unassignedRotations.values()).ifPresent(this::removeCname); } } @@ -42,4 +50,20 @@ public class DnsMaintainer extends Maintainer { controller().nameServiceForwarder().removeRecords(Record.Type.CNAME, RecordData.fqdn(rotation.name()), Priority.normal); } + /** + * Returns the rotation that should be checked in this run. We check only one rotation per run to avoid running into + * rate limits that may be imposed by the {@link NameService} implementation. + */ + private Optional<Rotation> rotationToCheckOf(Collection<Rotation> rotations) { + if (rotations.isEmpty()) return Optional.empty(); + List<Rotation> rotationList = new ArrayList<>(rotations); + int index = rotationIndex.getAndUpdate((i) -> { + if (i < rotationList.size() - 1) { + return ++i; + } + return 0; + }); + return Optional.of(rotationList.get(index)); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainer.java index bb74a6a2ddb..0ddc24147ee 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainer.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.RotationName; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.log.LogLevel; @@ -25,6 +24,7 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -34,7 +34,7 @@ import java.util.logging.Logger; import java.util.stream.Collectors; /** - * Maintains DNS records as defined by routing policies for all exclusive load balancers in this system. + * Maintains routing policies and their DNS records for all exclusive load balancers in this system. * * @author mortent * @author mpolden @@ -53,15 +53,21 @@ public class RoutingPolicyMaintainer extends Maintainer { super(controller, interval, jobControl); this.nameServiceForwarder = controller.nameServiceForwarder(); this.db = db; + // Update serialized format + try (Lock lock = db.lockRoutingPolicies()) { + for (var policy : db.readRoutingPolicies().entrySet()) { + db.writeRoutingPolicies(policy.getKey(), policy.getValue()); + } + } } @Override protected void maintain() { Map<DeploymentId, List<LoadBalancer>> loadBalancers = findLoadBalancers(); - removeObsoleteAliases(loadBalancers); - registerCnames(loadBalancers); - removeObsoleteCnames(loadBalancers); - registerAliases(); + removeObsoleteEndpointsFromDns(loadBalancers); + storePolicies(loadBalancers); + removeObsoletePolicies(loadBalancers); + registerEndpointsInDns(); } /** Find all exclusive load balancers in this system, grouped by deployment */ @@ -83,8 +89,8 @@ public class RoutingPolicyMaintainer extends Maintainer { return Collections.unmodifiableMap(result); } - /** Create aliases (global rotations) for all current routing policies */ - private void registerAliases() { + /** Create global endpoints for all current routing policies */ + private void registerEndpointsInDns() { try (Lock lock = db.lockRoutingPolicies()) { Map<RoutingId, List<RoutingPolicy>> routingTable = routingTableFrom(db.readRoutingPolicies()); @@ -109,8 +115,8 @@ public class RoutingPolicyMaintainer extends Maintainer { } } - /** Create CNAME records for each individual load balancers */ - private void registerCnames(Map<DeploymentId, List<LoadBalancer>> loadBalancers) { + /** Store routing policies for all load balancers */ + private void storePolicies(Map<DeploymentId, List<LoadBalancer>> loadBalancers) { for (Map.Entry<DeploymentId, List<LoadBalancer>> entry : loadBalancers.entrySet()) { ApplicationId application = entry.getKey().applicationId(); ZoneId zone = entry.getKey().zoneId(); @@ -118,7 +124,7 @@ public class RoutingPolicyMaintainer extends Maintainer { Set<RoutingPolicy> policies = new LinkedHashSet<>(db.readRoutingPolicies(application)); for (LoadBalancer loadBalancer : entry.getValue()) { try { - RoutingPolicy policy = registerCname(application, zone, loadBalancer); + RoutingPolicy policy = storePolicy(application, zone, loadBalancer); if (!policies.add(policy)) { policies.remove(policy); policies.add(policy); @@ -134,8 +140,8 @@ public class RoutingPolicyMaintainer extends Maintainer { } } - /** Register DNS alias for given load balancer */ - private RoutingPolicy registerCname(ApplicationId application, ZoneId zone, LoadBalancer loadBalancer) { + /** Store policy for given load balancer and request a CNAME for it */ + private RoutingPolicy storePolicy(ApplicationId application, ZoneId zone, LoadBalancer loadBalancer) { RoutingPolicy routingPolicy = new RoutingPolicy(application, loadBalancer.cluster(), zone, loadBalancer.hostname(), loadBalancer.dnsZone(), loadBalancer.rotations()); @@ -145,26 +151,33 @@ public class RoutingPolicyMaintainer extends Maintainer { return routingPolicy; } - /** Remove all DNS records that point to non-existing load balancers */ - private void removeObsoleteCnames(Map<DeploymentId, List<LoadBalancer>> loadBalancers) { + /** Remove obsolete policies and their CNAME records */ + private void removeObsoletePolicies(Map<DeploymentId, List<LoadBalancer>> loadBalancers) { try (Lock lock = db.lockRoutingPolicies()) { - List<RoutingPolicy> removalCandidates = new ArrayList<>(db.readRoutingPolicies()); - Set<HostName> activeLoadBalancers = loadBalancers.values().stream() - .flatMap(Collection::stream) - .map(LoadBalancer::hostname) - .collect(Collectors.toSet()); - - // Remove any active load balancers + var allPolicies = new HashMap<>(db.readRoutingPolicies()); + var removalCandidates = allPolicies.values().stream() + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + var activeLoadBalancers = loadBalancers.values().stream() + .flatMap(Collection::stream) + .map(LoadBalancer::hostname) + .collect(Collectors.toSet()); + // Keep active load balancers by removing them from candidates removalCandidates.removeIf(policy -> activeLoadBalancers.contains(policy.canonicalName())); - for (RoutingPolicy policy : removalCandidates) { - String dnsName = policy.endpointIn(controller().system()).dnsName(); + for (var policy : removalCandidates) { + var dnsName = policy.endpointIn(controller().system()).dnsName(); nameServiceForwarder.removeRecords(Record.Type.CNAME, RecordName.from(dnsName), Priority.normal); + // Remove stale policy from curator + var updatedPolicies = new LinkedHashSet<>(allPolicies.getOrDefault(policy.owner(), Set.of())); + updatedPolicies.remove(policy); + allPolicies.put(policy.owner(), updatedPolicies); + db.writeRoutingPolicies(policy.owner(), updatedPolicies); } } } - /** Remove global rotations that are not referenced by given load balancers */ - private void removeObsoleteAliases(Map<DeploymentId, List<LoadBalancer>> loadBalancers) { + /** Remove DNS for global endpoints not referenced by given load balancers */ + private void removeObsoleteEndpointsFromDns(Map<DeploymentId, List<LoadBalancer>> loadBalancers) { try (Lock lock = db.lockRoutingPolicies()) { Set<RoutingId> removalCandidates = routingTableFrom(db.readRoutingPolicies()).keySet(); Set<RoutingId> activeRoutingIds = routingIdsFrom(loadBalancers); @@ -189,12 +202,13 @@ public class RoutingPolicyMaintainer extends Maintainer { return Collections.unmodifiableSet(routingIds); } - /** Compute a routing table from given policies. */ - private static Map<RoutingId, List<RoutingPolicy>> routingTableFrom(Set<RoutingPolicy> routingPolicies) { - Map<RoutingId, List<RoutingPolicy>> routingTable = new LinkedHashMap<>(); - for (RoutingPolicy policy : routingPolicies) { - for (RotationName rotation : policy.rotations()) { - RoutingId id = new RoutingId(policy.owner(), rotation); + /** Compute a routing table from given policies */ + private static Map<RoutingId, List<RoutingPolicy>> routingTableFrom(Map<ApplicationId, Set<RoutingPolicy>> routingPolicies) { + var flattenedPolicies = routingPolicies.values().stream().flatMap(Collection::stream).collect(Collectors.toSet()); + var routingTable = new LinkedHashMap<RoutingId, List<RoutingPolicy>>(); + for (var policy : flattenedPolicies) { + for (var rotation : policy.rotations()) { + var id = new RoutingId(policy.owner(), rotation); routingTable.compute(id, (k, policies) -> { if (policies == null) { policies = new ArrayList<>(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java index 142907ea6c3..62d401cf478 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgrader.java @@ -54,6 +54,7 @@ public class SystemUpgrader extends InfrastructureUpgrader { protected Optional<Version> targetVersion() { return controller().versionStatus().controllerVersion() .filter(vespaVersion -> !vespaVersion.isSystemVersion()) + .filter(vespaVersion -> vespaVersion.confidence() != VespaVersion.Confidence.broken) .map(VespaVersion::versionNumber); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index 1d9840ffa56..a2e5a0c78f7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -495,11 +495,10 @@ public class CuratorDb { curator.set(routingPolicyPath(application), asJson(routingPolicySerializer.toSlime(policies))); } - public Set<RoutingPolicy> readRoutingPolicies() { + public Map<ApplicationId, Set<RoutingPolicy>> readRoutingPolicies() { return curator.getChildren(routingPoliciesRoot).stream() .map(ApplicationId::fromSerializedForm) - .flatMap(application -> readRoutingPolicies(application).stream()) - .collect(Collectors.toUnmodifiableSet()); + .collect(Collectors.toUnmodifiableMap(Function.identity(), this::readRoutingPolicies)); } public Set<RoutingPolicy> readRoutingPolicies(ApplicationId application) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializer.java index 0b37638b29b..a9c6c54a44a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializer.java @@ -8,7 +8,6 @@ import com.yahoo.config.provision.RotationName; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Cursor; -import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; import com.yahoo.vespa.hosted.controller.application.RoutingPolicy; @@ -58,7 +57,7 @@ public class RoutingPolicySerializer { Set<RotationName> rotations = new LinkedHashSet<>(); inspect.field(rotationsField).traverse((ArrayTraverser) (j, rotation) -> rotations.add(RotationName.from(rotation.asString()))); policies.add(new RoutingPolicy(owner, - clusterId(inspect.field(clusterField)), + ClusterSpec.Id.from(inspect.field(clusterField).asString()), ZoneId.from(inspect.field(zoneField).asString()), HostName.from(inspect.field(canonicalNameField).asString()), Serializers.optionalField(inspect.field(dnsZoneField), Function.identity()), @@ -67,10 +66,4 @@ public class RoutingPolicySerializer { return Collections.unmodifiableSet(policies); } - // TODO: Remove and inline after Vespa 7.43 - private static ClusterSpec.Id clusterId(Inspector field) { - return Serializers.optionalField(field, ClusterSpec.Id::from).orElseGet(() -> new ClusterSpec.Id("default")); - } - - } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java index d01e2f2497f..2b0ee741e7e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java @@ -74,7 +74,7 @@ import static org.junit.Assert.assertNotNull; */ public final class ControllerTester { - private static final int availableRotations = 10; + public static final int availableRotations = 10; private final AthenzDbMock athenzDb; private final ManualClock clock; diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java index e40ea49bd8c..13218cc2442 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java @@ -6,12 +6,17 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.hosted.controller.Application; +import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.api.integration.dns.Record; +import com.yahoo.vespa.hosted.controller.api.integration.dns.RecordData; import com.yahoo.vespa.hosted.controller.api.integration.dns.RecordName; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.application.Endpoint; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.persistence.MockCuratorDb; +import com.yahoo.vespa.hosted.controller.rotation.Rotation; +import com.yahoo.vespa.hosted.controller.rotation.RotationId; import org.junit.Before; import org.junit.Test; @@ -96,15 +101,41 @@ public class DnsMaintainerTest { tester.controllerTester().deleteApplication(application.id()); // DnsMaintainer removes records - maintainer.maintain(); + for (int i = 0; i < ControllerTester.availableRotations; i++) { + maintainer.maintain(); + } tester.updateDns(); assertFalse("DNS record removed", findCname.apply("app1--tenant1.global.vespa.yahooapis.com").isPresent()); assertFalse("DNS record removed", findCname.apply("app1--tenant1.global.vespa.oath.cloud").isPresent()); assertFalse("DNS record removed", findCname.apply("app1.tenant1.global.vespa.yahooapis.com").isPresent()); } + @Test + public void rate_limit_record_removal() { + // Create stale records + int staleTotal = ControllerTester.availableRotations; + for (int i = 1; i <= staleTotal; i++) { + Rotation r = rotation(i); + tester.controllerTester().nameService().createCname(RecordName.from("stale-record-" + i + "." + + Endpoint.OATH_DNS_SUFFIX), + RecordData.from(r.name() + ".")); + } + + // One record is removed per run + for (int i = 1; i <= staleTotal*2; i++) { + maintainer.run(); + tester.updateDns(); + assertEquals(Math.max(staleTotal - i, 0), records().size()); + } + } + private Set<Record> records() { return tester.controllerTester().nameService().records(); } + private static Rotation rotation(int n) { + String id = String.format("%02d", n); + return new Rotation(new RotationId("rotation-id-" + id), "rotation-fqdn-" + id); + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainerTest.java index 721ee206f30..14d5dc4e7c3 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/RoutingPolicyMaintainerTest.java @@ -196,6 +196,8 @@ public class RoutingPolicyMaintainerTest { "c1.app1.tenant1.us-central-1.vespa.oath.cloud" ); assertEquals(expectedRecords, recordNames()); + assertTrue("Removes stale routing policies " + app2, tester.controller().applications().routingPolicies(app2.id()).isEmpty()); + assertEquals("Keeps routing policies for " + app1, 4, tester.controller().applications().routingPolicies(app1.id()).size()); } private void maintain() { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java index 001e7b736bb..7f558131dd0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import org.junit.Before; import org.junit.Test; @@ -247,6 +248,40 @@ public class SystemUpgraderTest { assertWantedVersion(SystemApplication.zone, version, zone1); } + @Test + public void upgrade_halts_on_broken_version() { + SystemUpgrader systemUpgrader = systemUpgrader(UpgradePolicy.create().upgrade(zone1).upgrade(zone2)); + + // Initial system version + Version version1 = Version.fromString("6.5"); + tester.upgradeSystem(version1); + systemUpgrader.maintain(); + assertCurrentVersion(List.of(SystemApplication.configServerHost, SystemApplication.proxyHost, + SystemApplication.configServer, SystemApplication.zone), + version1, zone1); + assertCurrentVersion(List.of(SystemApplication.configServerHost, SystemApplication.proxyHost, + SystemApplication.configServer, SystemApplication.zone), + version1, zone2); + + // System starts upgrading to next version + Version version2 = Version.fromString("6.6"); + tester.upgradeController(version2); + systemUpgrader.maintain(); + completeUpgrade(List.of(SystemApplication.configServerHost, SystemApplication.proxyHost), version2, zone1); + systemUpgrader.maintain(); + completeUpgrade(SystemApplication.configServer, version2, zone1); + systemUpgrader.maintain(); + completeUpgrade(SystemApplication.zone, version2, zone1); + convergeServices(SystemApplication.zone, zone1); + + // Confidence is reduced to broken and next zone is not scheduled for upgrade + tester.upgrader().overrideConfidence(version2, VespaVersion.Confidence.broken); + tester.computeVersionStatus(); + systemUpgrader.maintain(); + assertWantedVersion(List.of(SystemApplication.configServerHost, SystemApplication.proxyHost, + SystemApplication.configServer, SystemApplication.zone), version1, zone2); + } + /** Simulate upgrade of nodes allocated to given application. In a real system this is done by the node itself */ private void completeUpgrade(SystemApplication application, Version version, ZoneId... zones) { assertWantedVersion(application, version, zones); @@ -306,7 +341,7 @@ public class SystemUpgraderTest { private void assertVersion(SystemApplication application, Version version, Function<Node, Version> versionField, ZoneId... zones) { - for (ZoneId zone : zones) { + for (ZoneId zone : requireNonEmpty(zones)) { for (Node node : listNodes(zone, application)) { assertEquals(application + " version", version, versionField.apply(node)); } @@ -329,4 +364,9 @@ public class SystemUpgraderTest { new JobControl(tester.controllerTester().curator())); } + private static <T> T[] requireNonEmpty(T[] args) { + if (args.length == 0) throw new IllegalArgumentException("Need at least one argument"); + return args; + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializerTest.java index 4a4fd39ccb7..a0e95bd0393 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RoutingPolicySerializerTest.java @@ -7,10 +7,10 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.RotationName; import com.yahoo.config.provision.zone.ZoneId; -import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.application.RoutingPolicy; import org.junit.Test; +import java.util.Iterator; import java.util.Optional; import java.util.Set; @@ -25,48 +25,32 @@ public class RoutingPolicySerializerTest { @Test public void test_serialization() { - ApplicationId owner = ApplicationId.defaultId(); - Set<RotationName> rotations = Set.of(RotationName.from("r1"), RotationName.from("r2")); - Set<RoutingPolicy> loadBalancers = ImmutableSet.of(new RoutingPolicy(owner, - ClusterSpec.Id.from("my-cluster1"), - ZoneId.from("prod", "us-north-1"), - HostName.from("long-and-ugly-name"), - Optional.of("zone1"), - rotations), - new RoutingPolicy(owner, - ClusterSpec.Id.from("my-cluster2"), - ZoneId.from("prod", "us-north-2"), - HostName.from("long-and-ugly-name-2"), - Optional.empty(), - rotations)); - Set<RoutingPolicy> serialized = serializer.fromSlime(owner, serializer.toSlime(loadBalancers)); - assertEquals(loadBalancers, serialized); - } - - @Test - public void test_legacy_serialization() { // TODO: Remove after 7.43 has been released - String json = "{\n" + - " \"routingPolicies\": [\n" + - " {\n" + - " \"alias\": \"my-pretty-alias\",\n" + - " \"zone\": \"prod.us-north-1\",\n" + - " \"canonicalName\": \"long-and-ugly-name\",\n" + - " \"dnsZone\": \"zone1\",\n" + - " \"rotations\": [\n" + - " \"r1\",\n" + - " \"r2\"\n" + - " ]\n" + - " }\n" + - " ]\n" + - "}"; - ApplicationId owner = ApplicationId.defaultId(); - Set<RoutingPolicy> expected = Set.of(new RoutingPolicy(owner, - ClusterSpec.Id.from("default"), - ZoneId.from("prod", "us-north-1"), - HostName.from("long-and-ugly-name"), - Optional.of("zone1"), - Set.of(RotationName.from("r1"), RotationName.from("r2")))); - assertEquals(expected, serializer.fromSlime(owner, SlimeUtils.jsonToSlime(json))); + var owner = ApplicationId.defaultId(); + var rotations = Set.of(RotationName.from("r1"), RotationName.from("r2")); + var policies = ImmutableSet.of(new RoutingPolicy(owner, + ClusterSpec.Id.from("my-cluster1"), + ZoneId.from("prod", "us-north-1"), + HostName.from("long-and-ugly-name"), + Optional.of("zone1"), + rotations), + new RoutingPolicy(owner, + ClusterSpec.Id.from("my-cluster2"), + ZoneId.from("prod", "us-north-2"), + HostName.from("long-and-ugly-name-2"), + Optional.empty(), + rotations)); + var serialized = serializer.fromSlime(owner, serializer.toSlime(policies)); + assertEquals(policies.size(), serialized.size()); + for (Iterator<RoutingPolicy> it1 = policies.iterator(), it2 = serialized.iterator(); it1.hasNext();) { + var expected = it1.next(); + var actual = it2.next(); + assertEquals(expected.owner(), actual.owner()); + assertEquals(expected.cluster(), actual.cluster()); + assertEquals(expected.zone(), actual.zone()); + assertEquals(expected.canonicalName(), actual.canonicalName()); + assertEquals(expected.dnsZone(), actual.dnsZone()); + assertEquals(expected.rotations(), actual.rotations()); + } } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index a7a28591d62..7032112a860 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -1,6 +1,7 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.restapi.application; +import ai.vespa.hosted.api.MultiPartStreamer; import com.yahoo.application.container.handler.Request; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; @@ -58,6 +59,7 @@ import com.yahoo.vespa.hosted.controller.restapi.ContainerControllerTester; import com.yahoo.vespa.hosted.controller.restapi.ContainerTester; import com.yahoo.vespa.hosted.controller.restapi.ControllerContainerTest; import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant; +import com.yahoo.yolean.Exceptions; import org.apache.http.HttpEntity; import org.apache.http.entity.ContentType; import org.apache.http.entity.mime.MultipartEntityBuilder; @@ -1417,16 +1419,12 @@ public class ApplicationApiTest extends ControllerContainerTest { return builder.build(); } - private HttpEntity createApplicationSubmissionData(ApplicationPackage applicationPackage) { - MultipartEntityBuilder builder = MultipartEntityBuilder.create(); - builder.addTextBody(EnvironmentResource.SUBMIT_OPTIONS, - "{\"repository\":\"repo\",\"branch\":\"master\",\"commit\":\"d00d\",\"authorEmail\":\"a@b\"}", - ContentType.APPLICATION_JSON); - builder.addBinaryBody(EnvironmentResource.APPLICATION_ZIP, applicationPackage.zippedContent()); - builder.addBinaryBody(EnvironmentResource.APPLICATION_TEST_ZIP, "content".getBytes()); - return builder.build(); + private MultiPartStreamer createApplicationSubmissionData(ApplicationPackage applicationPackage) { + return new MultiPartStreamer().addJson(EnvironmentResource.SUBMIT_OPTIONS, "{\"repository\":\"repo\",\"branch\":\"master\",\"commit\":\"d00d\",\"authorEmail\":\"a@b\"}") + .addBytes(EnvironmentResource.APPLICATION_ZIP, applicationPackage.zippedContent()) + .addBytes(EnvironmentResource.APPLICATION_TEST_ZIP, "content".getBytes()); } - + private String deployOptions(boolean deployDirectly, Optional<ApplicationVersion> applicationVersion) { return "{\"vespaVersion\":null," + "\"ignoreValidationErrors\":false," + @@ -1629,6 +1627,9 @@ public class ApplicationApiTest extends ControllerContainerTest { private RequestBuilder data(byte[] data) { this.data = data; return this; } private RequestBuilder data(String data) { return data(data.getBytes(StandardCharsets.UTF_8)); } + private RequestBuilder data(MultiPartStreamer streamer) { + return Exceptions.uncheck(() -> data(streamer.data().readAllBytes()).contentType(streamer.contentType())); + } private RequestBuilder data(HttpEntity data) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { diff --git a/fnet/src/vespa/fnet/connection.h b/fnet/src/vespa/fnet/connection.h index b4272b91cef..760b1b96d4d 100644 --- a/fnet/src/vespa/fnet/connection.h +++ b/fnet/src/vespa/fnet/connection.h @@ -58,9 +58,9 @@ public: }; enum { - FNET_READ_SIZE = 16384, + FNET_READ_SIZE = 32768, FNET_READ_REDO = 10, - FNET_WRITE_SIZE = 16384, + FNET_WRITE_SIZE = 32768, FNET_WRITE_REDO = 10 }; diff --git a/hosted-api/OWNERS b/hosted-api/OWNERS new file mode 100644 index 00000000000..d0a102ecbf4 --- /dev/null +++ b/hosted-api/OWNERS @@ -0,0 +1 @@ +jonmv diff --git a/hosted-api/README.md b/hosted-api/README.md new file mode 100644 index 00000000000..28eea5c3f3d --- /dev/null +++ b/hosted-api/README.md @@ -0,0 +1 @@ +# Hosted Vespa controller API miscellaneous
\ No newline at end of file diff --git a/hosted-api/pom.xml b/hosted-api/pom.xml new file mode 100644 index 00000000000..f20244a8816 --- /dev/null +++ b/hosted-api/pom.xml @@ -0,0 +1,44 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>parent</artifactId> + <groupId>com.yahoo.vespa</groupId> + <version>7-SNAPSHOT</version> + <relativePath>../parent/pom.xml</relativePath> + </parent> + <artifactId>hosted-api</artifactId> + <description>Miscellaneous for tenant client -- hosted Vespa controller communication</description> + + <dependencies> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>security-utils</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>config-provisioning</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>vespajlib</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.12</version> + <scope>test</scope> + </dependency> + </dependencies> +</project> + diff --git a/hosted-api/src/main/java/ai/vespa/hosted/api/Method.java b/hosted-api/src/main/java/ai/vespa/hosted/api/Method.java new file mode 100644 index 00000000000..ff7c1e4270b --- /dev/null +++ b/hosted-api/src/main/java/ai/vespa/hosted/api/Method.java @@ -0,0 +1,16 @@ +package ai.vespa.hosted.api; + +/** + * HTTP methods. + * + * @author jonmv + */ +public enum Method { + + GET, + PUT, + POST, + PATCH, + DELETE; + +} diff --git a/hosted-api/src/main/java/ai/vespa/hosted/api/MultiPartStreamer.java b/hosted-api/src/main/java/ai/vespa/hosted/api/MultiPartStreamer.java new file mode 100644 index 00000000000..0dde6fd3bde --- /dev/null +++ b/hosted-api/src/main/java/ai/vespa/hosted/api/MultiPartStreamer.java @@ -0,0 +1,156 @@ +package ai.vespa.hosted.api; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.SequenceInputStream; +import java.io.UncheckedIOException; +import java.net.http.HttpRequest; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Used to create builders for multi part http body entities, which stream their data. + * + * @author jonmv + */ +public class MultiPartStreamer { + + private final String boundary; + private final List<Supplier<InputStream>> streams; + + MultiPartStreamer(String boundary) { + this.boundary = boundary; + this.streams = new ArrayList<>(); + } + + /** Creates a new MultiPartBodyStreamer in which parts can be aggregated, and then streamed. */ + public MultiPartStreamer() { + this(UUID.randomUUID().toString()); + } + + /** Adds the given data as a named part in this, using the {@code "text/plain"} content type. */ + public MultiPartStreamer addText(String name, String text) { + return addData(name, "text/plain", text); + } + + /** Adds the given data as a named part in this, using the {@code "application/json"} content type. */ + public MultiPartStreamer addJson(String name, String json) { + return addData(name, "application/json", json); + } + + /** Adds the given data as a named part in this, using the given content type. */ + public MultiPartStreamer addData(String name, String type, String data) { + streams.add(() -> separator(name, type)); + streams.add(() -> asStream(data)); + + return this; + } + + /** Adds the given data as a named part in this, using the {@code "application/octet-stream" content type}. */ + public MultiPartStreamer addBytes(String name, byte[] bytes) { + streams.add(() -> separator(name, "application/octet-stream")); + streams.add(() -> new ByteArrayInputStream(bytes)); + + return this; + } + + /** Adds the contents of the file at the given path as a named part in this. */ + public MultiPartStreamer addFile(String name, Path path) { + streams.add(() -> separator(name, path)); + streams.add(() -> asStream(path)); + + return this; + } + + /** + * Streams the aggregate of the current parts of this to the given request builder, and returns it. + * Modifications to this streamer after a request builder has been obtained is not reflected in that builder. + * This method can be used multiple times, to create new requests. + * The request builder's method and content should not be set after it has been obtained. + */ + public HttpRequest.Builder streamTo(HttpRequest.Builder request, Method method) { + InputStream aggregate = data(); // Get the streams now, not when the aggregate is used. + return request.setHeader("Content-Type", contentType()) + .method(method.name(), HttpRequest.BodyPublishers.ofInputStream(() -> aggregate)); + } + + /** Returns an input stream which is an aggregate of all current parts in this, plus an end marker. */ + public InputStream data() { + InputStream aggregate = new SequenceInputStream(Collections.enumeration(Stream.concat(streams.stream().map(Supplier::get), + Stream.of(end())) + .collect(Collectors.toList()))); + + try { + if (aggregate.skip(2) != 2)// This should never happen, as the first stream is a ByteArrayInputStream. + throw new IllegalStateException("Failed skipping extraneous bytes."); + } + catch (IOException e) { // This should never happen, as the first stream is a ByteArrayInputStream; + throw new IllegalStateException("Failed skipping extraneous bytes.", e); + } + return new BufferedInputStream(aggregate); + } + + /** Returns the value of the {@code "Content-Type"} header to use with this. */ + public String contentType() { + return "multipart/form-data; boundary=" + boundary + "; charset: utf-8"; + } + + /** Returns the separator to put between one part and the next, when this is a string. */ + private InputStream separator(String name, String contentType) { + return asStream(disposition(name) + type(contentType)); + } + + /** Returns the separator to put between one part and the next, when this is a file. */ + private InputStream separator(String name, Path path) { + try { + String contentType = Files.probeContentType(path); + return asStream(disposition(name) + "; filename=\"" + path.getFileName() + "\"" + + type(contentType != null ? contentType : "application/octet-stream")); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** Returns the end delimiter of the request, with line breaks prepended. */ + private InputStream end() { + return asStream("\r\n--" + boundary + "--"); + } + + /** Returns the boundary and disposition header for a part, with line breaks prepended. */ + private String disposition(String name) { + return "\r\n--" + boundary + "\r\n" + + "Content-Disposition: form-data; name=\"" + name + "\""; + } + + /** Returns the content type header for a part, with line breaks pre- and appended. */ + private String type(String contentType) { + return "\r\nContent-Type: " + contentType + "\r\n\r\n"; + } + + /** Returns the a ByteArrayInputStream over the given string, UTF-8 encoded. */ + private static InputStream asStream(String string) { + return new ByteArrayInputStream(string.getBytes(StandardCharsets.UTF_8)); + } + + /** Returns an InputStream over the file at the given path — rethrows any IOException as UncheckedIOException. */ + private InputStream asStream(Path path) { + try { + return Files.newInputStream(path); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + +} diff --git a/hosted-api/src/main/java/ai/vespa/hosted/api/RequestSigner.java b/hosted-api/src/main/java/ai/vespa/hosted/api/RequestSigner.java new file mode 100644 index 00000000000..fb8eb1421b4 --- /dev/null +++ b/hosted-api/src/main/java/ai/vespa/hosted/api/RequestSigner.java @@ -0,0 +1,103 @@ +package ai.vespa.hosted.api; + +import com.yahoo.security.KeyUtils; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.net.http.HttpRequest; +import java.security.Key; +import java.security.PrivateKey; +import java.security.Signature; +import java.security.SignatureException; +import java.time.Clock; +import java.util.Base64; +import java.util.function.Supplier; + +import static ai.vespa.hosted.api.Signatures.sha256Digest; + +/** + * Signs HTTP request headers using a private key, for verification by the indicated public key. + * + * @author jonmv + */ +public class RequestSigner { + + private final Signature signer; + private final String keyId; + private final Clock clock; + + /** Creates a new request signer from the PEM encoded RSA key at the specified path, owned by the given application. */ + public RequestSigner(String pemPrivateKey, String keyId) { + this(pemPrivateKey, keyId, Clock.systemUTC()); + } + + /** Creates a new request signer with a custom clock. */ + RequestSigner(String pemPrivateKey, String keyId, Clock clock) { + this.signer = KeyUtils.createSigner(KeyUtils.fromPemEncodedPrivateKey(pemPrivateKey)); + this.keyId = keyId; + this.clock = clock; + } + + /** + * Completes, signs and returns the given request builder and data.<br> + * <br> + * The request builder's method and data are set to the given arguments, and a hash of the + * content is computed and added to a header, together with other meta data, like the URI + * of the request, the current UTC time, and the id of the public key which shall be used to + * verify this signature. + * Finally, a signature is computed from these fields, based on the private key of this, and + * added to the request as another header. + */ + public HttpRequest signed(HttpRequest.Builder request, Method method, Supplier<InputStream> data) { + try { + String timestamp = clock.instant().toString(); + String contentHash = Base64.getEncoder().encodeToString(sha256Digest(data::get)); + byte[] canonicalMessage = Signatures.canonicalMessageOf(method.name(), request.copy().build().uri(), timestamp, contentHash); + signer.update(canonicalMessage); + String signature = Base64.getEncoder().encodeToString(signer.sign()); + + request.setHeader("X-Timestamp", timestamp); + request.setHeader("X-Content-Hash", contentHash); + request.setHeader("X-Key-Id", keyId); + request.setHeader("X-Authorization", signature); + + request.method(method.name(), HttpRequest.BodyPublishers.ofInputStream(data)); + return request.build(); + } + catch (SignatureException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Completes, signs and returns the given request builder and data. + * + * This sets the Content-Type header from the given streamer, and returns + * {@code signed(request, method, streamer::data)}. + */ + public HttpRequest signed(HttpRequest.Builder request, Method method, MultiPartStreamer streamer) { + request.setHeader("Content-Type", streamer.contentType()); + return signed(request, method, streamer::data); + } + + /** + * Completes, signs and returns the given request builder.<br> + * <br> + * This is simply a convenience for<br> + * {@code signed(request, method, () -> new ByteArrayInputStream(data))}. + */ + public HttpRequest signed(HttpRequest.Builder request, Method method, byte[] data) { + return signed(request, method, () -> new ByteArrayInputStream(data)); + } + + /** + * Completes, signs and returns the given request builder.<br> + * <br> + * This sets the data of the request to be empty, and returns <br> + * {@code signed(request, method, InputStream::nullInputStream)}. + */ + public HttpRequest signed(HttpRequest.Builder request, Method method) { + return signed(request, method, InputStream::nullInputStream); + } + +} diff --git a/hosted-api/src/main/java/ai/vespa/hosted/api/RequestVerifier.java b/hosted-api/src/main/java/ai/vespa/hosted/api/RequestVerifier.java new file mode 100644 index 00000000000..a46a93f624e --- /dev/null +++ b/hosted-api/src/main/java/ai/vespa/hosted/api/RequestVerifier.java @@ -0,0 +1,51 @@ +package ai.vespa.hosted.api; + +import com.yahoo.security.KeyUtils; + +import java.net.URI; +import java.security.Key; +import java.security.PublicKey; +import java.security.Signature; +import java.security.SignatureException; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.Base64; + +/** + * Verifies that signed HTTP requests match the indicated public key. + * + * @author jonmv + */ +public class RequestVerifier { + + private final Signature verifier; + private final Clock clock; + + public RequestVerifier(String pemPublicKey) { + this(pemPublicKey, Clock.systemUTC()); + } + + RequestVerifier(String pemPublicKey, Clock clock) { + this.verifier = KeyUtils.createVerifier(KeyUtils.fromPemEncodedPublicKey(pemPublicKey)); + this.clock = clock; + } + + /** Returns whether the given data is a valid request now, as dictated by timestamp and the decryption key of this. */ + public boolean verify(Method method, URI requestUri, String timestamp, String contentHash, String signature) { + try { + Instant now = clock.instant(), then = Instant.parse(timestamp); + if (Duration.between(now, then).abs().compareTo(Duration.ofMinutes(5)) > 0) + return false; // Timestamp mismatch between sender and receiver of more than 5 minutes is not acceptable. + + byte[] canonicalMessage = Signatures.canonicalMessageOf(method.name(), requestUri, timestamp, contentHash); + verifier.update(canonicalMessage); + return verifier.verify(Base64.getDecoder().decode(signature)); + } + catch (RuntimeException | SignatureException e) { + return false; + } + } + +} diff --git a/hosted-api/src/main/java/ai/vespa/hosted/api/Signatures.java b/hosted-api/src/main/java/ai/vespa/hosted/api/Signatures.java new file mode 100644 index 00000000000..c93d5fc9168 --- /dev/null +++ b/hosted-api/src/main/java/ai/vespa/hosted/api/Signatures.java @@ -0,0 +1,52 @@ +package ai.vespa.hosted.api; + +import com.yahoo.security.KeyUtils; + +import java.io.InputStream; +import java.net.URI; +import java.security.DigestInputStream; +import java.security.InvalidKeyException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.PrivateKey; +import java.security.PublicKey; +import java.security.Signature; +import java.security.SignatureException; +import java.util.concurrent.Callable; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public class Signatures { + + /** Returns the SHA-256 hash of the content in the implied input stream, consuming it in the process. */ + public static byte[] sha256Digest(Callable<InputStream> in) { + try (DigestInputStream digester = sha256Digester(in.call())) { + byte[] buffer = new byte[1 << 10]; + while (digester.read(buffer) != -1); // Consume the stream to compute the digest. + + return digester.getMessageDigest().digest(); + } + catch (IllegalStateException e) { + throw e; + } + catch (Exception e) { + throw new IllegalArgumentException(e); + } + } + + /** Wraps the given input stream in a digester which computes a SHA 256 hash of the contents read through it. */ + public static DigestInputStream sha256Digester(InputStream in) { + try { + return new DigestInputStream(in, MessageDigest.getInstance("SHA-256")); + } + catch (NoSuchAlgorithmException e) { + throw new IllegalStateException(e); + } + } + + /** Returns a canonical representation of the given request data. */ + public static byte[] canonicalMessageOf(String method, URI requestUri, String timestamp, String hash) { + return (method + "\n" + requestUri.normalize() + "\n" + timestamp + "\n" + hash).getBytes(UTF_8); + } + +} diff --git a/hosted-api/src/test/java/ai/vespa/hosted/api/MultiPartStreamerTest.java b/hosted-api/src/test/java/ai/vespa/hosted/api/MultiPartStreamerTest.java new file mode 100644 index 00000000000..d94a5b3314c --- /dev/null +++ b/hosted-api/src/test/java/ai/vespa/hosted/api/MultiPartStreamerTest.java @@ -0,0 +1,70 @@ +package ai.vespa.hosted.api; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpRequest; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class MultiPartStreamerTest { + + @Rule + public TemporaryFolder tmp = new TemporaryFolder(); + + @Test + public void test() throws IOException { + Path file = tmp.newFile().toPath(); + Files.write(file, new byte[]{0x48, 0x69}); + MultiPartStreamer streamer = new MultiPartStreamer("My boundary"); + + assertEquals("--My boundary--", + new String(streamer.data().readAllBytes())); + + streamer.addData("data", "uss/enterprise", "lore") + .addJson("json", "{\"xml\":false}") + .addText("text", "Hello!") + .addFile("file", file); + + String expected = "--My boundary\r\n" + + "Content-Disposition: form-data; name=\"data\"\r\n" + + "Content-Type: uss/enterprise\r\n" + + "\r\n" + + "lore\r\n" + + "--My boundary\r\n" + + "Content-Disposition: form-data; name=\"json\"\r\n" + + "Content-Type: application/json\r\n" + + "\r\n" + + "{\"xml\":false}\r\n" + + "--My boundary\r\n" + + "Content-Disposition: form-data; name=\"text\"\r\n" + + "Content-Type: text/plain\r\n" + + "\r\n" + + "Hello!\r\n" + + "--My boundary\r\n" + + "Content-Disposition: form-data; name=\"file\"; filename=\"" + file.getFileName() + "\"\r\n" + + "Content-Type: application/octet-stream\r\n" + + "\r\n" + + "Hi\r\n" + + "--My boundary--"; + + assertEquals(expected, + new String(streamer.data().readAllBytes())); + + // Verify that all data is read again for a new builder. + assertEquals(expected, + new String(streamer.data().readAllBytes())); + + assertEquals(List.of("multipart/form-data; boundary=My boundary; charset: utf-8"), + streamer.streamTo(HttpRequest.newBuilder(), Method.POST) + .uri(URI.create("https://uri/path")) + .build().headers().allValues("Content-Type")); + } + +} diff --git a/hosted-api/src/test/java/ai/vespa/hosted/api/SignaturesTest.java b/hosted-api/src/test/java/ai/vespa/hosted/api/SignaturesTest.java new file mode 100644 index 00000000000..23d29dbeb55 --- /dev/null +++ b/hosted-api/src/test/java/ai/vespa/hosted/api/SignaturesTest.java @@ -0,0 +1,158 @@ +package ai.vespa.hosted.api; + +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.net.URI; +import java.net.http.HttpRequest; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.time.Clock; +import java.time.Instant; +import java.time.ZoneOffset; + +import static ai.vespa.hosted.api.Signatures.sha256Digest; +import static ai.vespa.hosted.api.Signatures.sha256Digester; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Tests that messages can be signed and verified, and that the keys used for this can be parsed. + * + * To generate appropriate keys, run the following commands: + * <ol> + * <li>{@code openssl ecparam -name prime256v1 -genkey -noout -out private_key.pem}</li> + * <li>{@code openssl ec -pubout -in private_key.pem -out public_key.pem}</li> + * </ol> + * Step 1 generates a private key and Step 2 extracts and writes the public key to a separate file. + * + * @author jonmv + */ +public class SignaturesTest { + + private static final String emPemPublicKey = "-----BEGIN PUBLIC KEY-----\n" + + "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEuKVFA8dXk43kVfYKzkUqhEY2rDT9\n" + + "z/4jKSTHwbYR8wdsOSrJGVEUPbS2nguIJ64OJH7gFnxM6sxUVj+Nm2HlXw==\n" + + "-----END PUBLIC KEY-----\n"; + + private static final String ecPemPrivateKey = "-----BEGIN EC PRIVATE KEY-----\n" + + "MHcCAQEEIJUmbIX8YFLHtpRgkwqDDE3igU9RG6JD9cYHWAZii9j7oAoGCCqGSM49\n" + + "AwEHoUQDQgAEuKVFA8dXk43kVfYKzkUqhEY2rDT9z/4jKSTHwbYR8wdsOSrJGVEU\n" + + "PbS2nguIJ64OJH7gFnxM6sxUVj+Nm2HlXw==\n" + + "-----END EC PRIVATE KEY-----\n"; + + private static final String otherEcPemPublicKey = "-----BEGIN PUBLIC KEY-----\n" + + "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEFELzPyinTfQ/sZnTmRp5E4Ve/sbE\n" + + "pDhJeqczkyFcT2PysJ5sZwm7rKPEeXDOhzTPCyRvbUqc2SGdWbKUGGa/Yw==\n" + + "-----END PUBLIC KEY-----\n"; + + private static final byte[] message = ("Hello,\n" + + "\n" + + "this is a secret message.\n" + + "\n" + + "Yours truly,\n" + + "∠( ᐛ 」∠)_").getBytes(UTF_8); + + @Test + public void testHashing() throws Exception { + byte[] hash1 = MessageDigest.getInstance("SHA-256").digest(message); + byte[] hash2 = sha256Digest(() -> new ByteArrayInputStream(message)); + DigestInputStream digester = sha256Digester(new ByteArrayInputStream(message)); + digester.readAllBytes(); + byte[] hash3 = digester.getMessageDigest().digest(); + + assertArrayEquals(hash1, hash2); + assertArrayEquals(hash1, hash3); + } + + @Test + public void testSigning() { + Clock clock = Clock.fixed(Instant.EPOCH, ZoneOffset.UTC); + RequestSigner signer = new RequestSigner(ecPemPrivateKey, "myKey", clock); + + URI requestUri = URI.create("https://host:123/path//./../more%2fpath/?yes=no"); + HttpRequest.Builder builder = HttpRequest.newBuilder(requestUri); + HttpRequest request = signer.signed(builder, Method.GET); + + // GET request with correct signature and URI as-is. + RequestVerifier verifier = new RequestVerifier(emPemPublicKey, clock); + assertTrue(verifier.verify(Method.valueOf(request.method()), + request.uri(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // POST request with correct signature and URI normalized. + MultiPartStreamer streamer = new MultiPartStreamer().addText("message", new String(message, UTF_8)) + .addBytes("copy", message); + request = signer.signed(builder, Method.POST, streamer); + assertTrue(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // Wrong method. + assertFalse(verifier.verify(Method.PATCH, + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // Wrong path. + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().resolve("asdf"), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // Wrong timestamp. + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + Instant.EPOCH.plusMillis(1).toString(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // Wrong content hash. + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + "Wrong/hash", + request.headers().firstValue("X-Authorization").get())); + + // Wrong signature. + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + "Wrong/signature")); + + // Key pair mismatch. + verifier = new RequestVerifier(otherEcPemPublicKey, clock); + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // Too old request. + verifier = new RequestVerifier(emPemPublicKey, Clock.fixed(Instant.EPOCH.plusSeconds(301), ZoneOffset.UTC)); + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + // Too new request. + verifier = new RequestVerifier(emPemPublicKey, Clock.fixed(Instant.EPOCH.minusSeconds(301), ZoneOffset.UTC)); + assertFalse(verifier.verify(Method.valueOf(request.method()), + request.uri().normalize(), + request.headers().firstValue("X-Timestamp").get(), + request.headers().firstValue("X-Content-Hash").get(), + request.headers().firstValue("X-Authorization").get())); + + } + +} diff --git a/jrt/src/com/yahoo/jrt/Connection.java b/jrt/src/com/yahoo/jrt/Connection.java index b8ed1b32eda..6521c7cb8a5 100644 --- a/jrt/src/com/yahoo/jrt/Connection.java +++ b/jrt/src/com/yahoo/jrt/Connection.java @@ -18,9 +18,9 @@ class Connection extends Target { private static Logger log = Logger.getLogger(Connection.class.getName()); - private static final int READ_SIZE = 16384; + private static final int READ_SIZE = 32768; private static final int READ_REDO = 10; - private static final int WRITE_SIZE = 16384; + private static final int WRITE_SIZE = 32768; private static final int WRITE_REDO = 10; private static final int INITIAL = 0; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index ccdfbf0b402..020236138f8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -234,6 +234,7 @@ public class NodeRepository extends AbstractComponent { // - config servers // - all connections from the world on 4080 (insecure tb removed), and 4443 trustedNodes.addAll(candidates.nodeType(NodeType.config).asList()); + trustedPorts.add(443); trustedPorts.add(4080); trustedPorts.add(4443); break; @@ -138,5 +138,6 @@ <module>vsm</module> <module>yolean</module> <module>zkfacade</module> + <module>hosted-api</module> </modules> </project> diff --git a/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp index 4e4d90e6871..90953f78c40 100644 --- a/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp +++ b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp @@ -94,15 +94,13 @@ unpackFeatures(std::vector<PosEntry> &entries, uint64_t wordNum, const DocIdAndFeatures &features) { - std::vector<search::index::WordDocElementFeatures>::const_iterator - element = features._elements.begin(); - std::vector<search::index::WordDocElementWordPosFeatures>:: - const_iterator position = features._wordPositions.begin(); - uint32_t numElements = features._elements.size(); + auto element = features.elements().begin(); + auto position = features.word_positions().begin(); + uint32_t numElements = features.elements().size(); while (numElements--) { uint32_t numOccs = element->getNumOccs(); while (numOccs--) { - entries.push_back(PosEntry(features._docId, + entries.push_back(PosEntry(features.doc_id(), fieldId, element->getElementId(), position->getWordPos(), @@ -447,7 +445,7 @@ ShowPostingListSubApp::readPostings(const SchemaUtil::IndexIterator &index, if (r.isValid()) r.read(); while (r.isValid()) { - uint32_t docId = r._docIdAndFeatures._docId; + uint32_t docId = r._docIdAndFeatures.doc_id(); if (docId >= _minDocId && docId < _docIdLimit) { unpackFeatures(entries, index.getIndex(), r._wordNum, r._docIdAndFeatures); diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp index e33158e559f..fab2ed734cd 100644 --- a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp @@ -62,10 +62,10 @@ FieldWriterWrapper & FieldWriterWrapper::add(uint32_t docId) { DocIdAndFeatures daf; - daf._docId = docId; - daf._elements.push_back(WordDocElementFeatures(0)); - daf._elements.back().setNumOccs(1); - daf._wordPositions.push_back(WordDocElementWordPosFeatures(0)); + daf.set_doc_id(docId); + daf.elements().emplace_back(0); + daf.elements().back().setNumOccs(1); + daf.word_positions().emplace_back(0); //LOG(info, "add(%" PRIu64 ", %u)", wordNum, docId); _writer.add(daf); return *this; diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp index 49e9d613861..aca83d67a8a 100644 --- a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp @@ -41,27 +41,27 @@ Test::assertFeatures(const DocIdAndFeatures &exp, const DocIdAndFeatures &act) { // docid is not encoded as part of features - if (!EXPECT_EQUAL(exp._elements.size(), - act._elements.size())) + if (!EXPECT_EQUAL(exp.elements().size(), + act.elements().size())) return false; - for (size_t i = 0; i < exp._elements.size(); ++i) { - if (!EXPECT_EQUAL(exp._elements[i]._elementId, - act._elements[i]._elementId)) + for (size_t i = 0; i < exp.elements().size(); ++i) { + if (!EXPECT_EQUAL(exp.elements()[i].getElementId(), + act.elements()[i].getElementId())) return false; - if (!EXPECT_EQUAL(exp._elements[i]._numOccs, - act._elements[i]._numOccs)) + if (!EXPECT_EQUAL(exp.elements()[i].getNumOccs(), + act.elements()[i].getNumOccs())) return false; - if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) + if (!EXPECT_EQUAL(exp.elements()[i].getWeight(), act.elements()[i].getWeight())) return false; - if (!EXPECT_EQUAL(exp._elements[i]._elementLen, - act._elements[i]._elementLen)) + if (!EXPECT_EQUAL(exp.elements()[i].getElementLen(), + act.elements()[i].getElementLen())) return false; } - if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) + if (!EXPECT_EQUAL(exp.word_positions().size(), act.word_positions().size())) return false; - for (size_t i = 0; i < exp._wordPositions.size(); ++i) { - if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, - act._wordPositions[i]._wordPos)) return false; + for (size_t i = 0; i < exp.word_positions().size(); ++i) { + if (!EXPECT_EQUAL(exp.word_positions()[i].getWordPos(), + act.word_positions()[i].getWordPos())) return false; } return true; } @@ -73,13 +73,13 @@ getFeatures(uint32_t numOccs, uint32_t elemLen) { DocIdAndFeatures f; - f._docId = 0; - f._elements.push_back(WordDocElementFeatures(0)); - f._elements.back().setNumOccs(numOccs); - f._elements.back().setWeight(weight); - f._elements.back().setElementLen(elemLen); + f.set_doc_id(0); + f.elements().push_back(WordDocElementFeatures(0)); + f.elements().back().setNumOccs(numOccs); + f.elements().back().setWeight(weight); + f.elements().back().setElementLen(elemLen); for (uint32_t i = 0; i < numOccs; ++i) { - f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + f.word_positions().push_back(WordDocElementWordPosFeatures(i)); } return f; } diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index e2401c1ad7c..2b9b77d32a3 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -99,10 +99,10 @@ public: if (!_firstDoc) { _ss << ","; } - _ss << "d=" << features._docId << "["; + _ss << "d=" << features.doc_id() << "["; bool first_elem = true; size_t word_pos_offset = 0; - for (const auto& elem : features._elements) { + for (const auto& elem : features.elements()) { if (!first_elem) { _ss << ","; } @@ -112,7 +112,7 @@ public: if (!first_pos) { _ss << ","; } - _ss << features._wordPositions[i + word_pos_offset].getWordPos(); + _ss << features.word_positions()[i + word_pos_offset].getWordPos(); first_pos = false; } word_pos_offset += elem.getNumOccs(); @@ -601,12 +601,10 @@ addElement(DocIdAndFeatures &f, uint32_t numOccs, int32_t weight = 1) { - f._elements.push_back(WordDocElementFeatures(f._elements.size())); - f._elements.back().setElementLen(elemLen); - f._elements.back().setWeight(weight); - f._elements.back().setNumOccs(numOccs); + f.elements().emplace_back(f.elements().size(), weight, elemLen); + f.elements().back().setNumOccs(numOccs); for (uint32_t i = 0; i < numOccs; ++i) { - f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + f.word_positions().emplace_back(i); } } @@ -679,11 +677,11 @@ TEST_F(FieldIndexCollectionTest, require_that_basic_dumping_to_index_builder_is_ b.startField(4); b.startWord("a"); DocIdAndFeatures features; - features._docId = 2; - features._elements.emplace_back(0, 10, 20); - features._elements.back().setNumOccs(2); - features._wordPositions.emplace_back(1); - features._wordPositions.emplace_back(3); + features.set_doc_id(2); + features.elements().emplace_back(0, 10, 20); + features.elements().back().setNumOccs(2); + features.word_positions().emplace_back(1); + features.word_positions().emplace_back(3); b.add_document(features); b.endWord(); b.endField(); diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp index dfcdd991b22..83776d22fee 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -133,10 +133,8 @@ vespalib::string noFeatures = "NoFeatures"; } -template <bool bigEndian> void -FeatureDecodeContext<bigEndian>:: -readBytes(uint8_t *buf, size_t len) +DecodeContext64Base::readBytes(uint8_t *buf, size_t len) { while (len > 0) { // Ensure that buffer to read from isn't empty @@ -167,9 +165,8 @@ readBytes(uint8_t *buf, size_t len) } -template <bool bigEndian> uint32_t -FeatureDecodeContext<bigEndian>:: +DecodeContext64Base:: readHeader(vespalib::GenericHeader &header, int64_t fileSize) { size_t hhSize = vespalib::GenericHeader::getMinSize(); diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 67e23aabc1e..b9166f675aa 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -1136,16 +1136,18 @@ public: // File position for end of buffer minus byte address of end of buffer // minus sizeof uint64_t. Then shifted left by 3 to represent bits. uint64_t _fileReadBias; + search::ComprFileReadContext *_readContext; DecodeContext64Base() : search::ComprFileDecodeContext(), _valI(nullptr), - _valE(nullptr), + _valE(static_cast<const uint64_t *>(nullptr) - 1), _realValE(nullptr), _val(0), _cacheInt(0), _preRead(0), - _fileReadBias(0) + _fileReadBias(0), + _readContext(nullptr) { } @@ -1163,7 +1165,8 @@ public: _val(val), _cacheInt(cacheInt), _preRead(preRead), - _fileReadBias(0) + _fileReadBias(0), + _readContext(nullptr) { } @@ -1183,6 +1186,7 @@ public: _cacheInt = rhs._cacheInt; _preRead = rhs._preRead; _fileReadBias = rhs._fileReadBias; + _readContext = rhs._readContext; return *this; } @@ -1278,6 +1282,26 @@ public: return (val >> 1); } } + + void setReadContext(search::ComprFileReadContext *readContext) { + _readContext = readContext; + } + search::ComprFileReadContext *getReadContext() const { + return _readContext; + } + void readComprBuffer() { + _readContext->readComprBuffer(); + } + void readComprBufferIfNeeded() { + if (__builtin_expect(_valI >= _valE, false)) { + readComprBuffer(); + } + } + virtual uint64_t readBits(uint32_t length) = 0; + virtual void align(uint32_t alignment) = 0; + virtual uint64_t decode_exp_golomb(int k) = 0; + void readBytes(uint8_t *buf, size_t len); + uint32_t readHeader(vespalib::GenericHeader &header, int64_t fileSize); }; @@ -1299,7 +1323,7 @@ public: DecodeContext64(const uint64_t *compr, int bitOffset) : DecodeContext64Base(compr + 1, - nullptr, + static_cast<const uint64_t *>(nullptr) - 1, nullptr, 0, EC::bswap(*compr), @@ -1385,10 +1409,12 @@ public: }; void skipBits(int bits) override { + readComprBufferIfNeeded(); while (bits >= 64) { _val = 0; ReadBits(64, _val, _cacheInt, _preRead, _valI); bits -= 64; + readComprBufferIfNeeded(); } if (bits > 0) { if (bigEndian) { @@ -1397,6 +1423,7 @@ public: _val >>= bits; } ReadBits(bits, _val, _cacheInt, _preRead, _valI); + readComprBufferIfNeeded(); } } @@ -1436,7 +1463,7 @@ public: } uint64_t - readBits(uint32_t length) + readBits(uint32_t length) override { uint64_t res; if (length < 64) { @@ -1452,20 +1479,32 @@ public: _val = 0; } UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC); + readComprBufferIfNeeded(); return res; } + uint64_t decode_exp_golomb(int k) override { + uint32_t length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_val, _valI, _preRead, _cacheInt, k, EC); + readComprBufferIfNeeded(); + return val64; + } + void - align(uint32_t alignment) + align(uint32_t alignment) override { + readComprBufferIfNeeded(); uint64_t pad = (- getReadOffset()) & (alignment - 1); while (pad > 64) { (void) readBits(64); pad -= 64; + readComprBufferIfNeeded(); } if (pad > 0) { (void) readBits(pad); } + readComprBufferIfNeeded(); } /* @@ -1489,7 +1528,6 @@ template <bool bigEndian> class FeatureDecodeContext : public DecodeContext64<bigEndian> { public: - search::ComprFileReadContext *_readContext; typedef DecodeContext64<bigEndian> ParentClass; typedef index::DocIdAndFeatures DocIdAndFeatures; typedef index::PostingListParams PostingListParams; @@ -1504,68 +1542,29 @@ public: using ParentClass::getBitOffset; using ParentClass::readBits; using ParentClass::ReadBits; + using ParentClass::readComprBuffer; + using ParentClass::readComprBufferIfNeeded; + using ParentClass::readHeader; + using ParentClass::readBytes; FeatureDecodeContext() - : ParentClass(), - _readContext(nullptr) + : ParentClass() { } FeatureDecodeContext(const uint64_t *compr, int bitOffset) - : ParentClass(compr, bitOffset), - _readContext(nullptr) + : ParentClass(compr, bitOffset) { } FeatureDecodeContext(const uint64_t *compr, int bitOffset, uint64_t bitLength) - : ParentClass(compr, bitOffset, bitLength), - _readContext(nullptr) - { - } - - FeatureDecodeContext & - operator=(const FeatureDecodeContext &rhs) - { - ParentClass::operator=(rhs); - _readContext = rhs._readContext; - return *this; - } - - void - setReadContext(search::ComprFileReadContext *readContext) - { - _readContext = readContext; - } - - search::ComprFileReadContext * - getReadContext() const - { - return _readContext; - } - - void - readComprBuffer() - { - _readContext->readComprBuffer(); - } - - void - readComprBufferIfNeeded() + : ParentClass(compr, bitOffset, bitLength) { - if (__builtin_expect(_valI >= _valE, false)) { - readComprBuffer(); - } } - void - readBytes(uint8_t *buf, size_t len); - - virtual uint32_t - readHeader(vespalib::GenericHeader &header, int64_t fileSize); - virtual void readHeader(const vespalib::GenericHeader &header, const vespalib::string &prefix); @@ -1594,41 +1593,6 @@ public: */ virtual void getParams(PostingListParams ¶ms) const; - - void skipBits(int bits) override { - readComprBufferIfNeeded(); - while (bits >= 64) { - _val = 0; - ReadBits(64, _val, _cacheInt, _preRead, _valI); - bits -= 64; - readComprBufferIfNeeded(); - } - if (bits > 0) { - if (bigEndian) { - _val <<= bits; - } else { - _val >>= bits; - } - ReadBits(bits, _val, _cacheInt, _preRead, _valI); - readComprBufferIfNeeded(); - } - } - - void - align(uint32_t alignment) - { - readComprBufferIfNeeded(); - uint64_t pad = (- getReadOffset()) & (alignment - 1); - while (pad > 64) { - (void) readBits(64); - pad -= 64; - readComprBufferIfNeeded(); - } - if (pad > 0) { - (void) readBits(pad); - } - readComprBufferIfNeeded(); - } }; typedef FeatureDecodeContext<true> FeatureDecodeContextBE; diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp index d4f663f32cc..9f5d3cf751f 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp @@ -12,8 +12,6 @@ LOG_SETUP(".posocccompression"); using search::index::DocIdAndFeatures; -using search::index::WordDocElementFeatures; -using search::index::WordDocElementWordPosFeatures; using search::index::PostingListParams; using search::index::SchemaUtil; using search::index::Schema; @@ -343,8 +341,8 @@ readFeatures(search::index::DocIdAndFeatures &features) uint64_t val64; const uint64_t *valE = _valE; - features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead); - features.setRaw(true); + features.clear_features((oPreRead == 0) ? 0 : 64 - oPreRead); + features.set_has_raw_data(true); const uint64_t *rawFeatures = (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2); uint64_t rawFeaturesStartBitPos = @@ -373,7 +371,7 @@ readFeatures(search::index::DocIdAndFeatures &features) } if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -394,7 +392,7 @@ readFeatures(search::index::DocIdAndFeatures &features) do { if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -410,7 +408,7 @@ readFeatures(search::index::DocIdAndFeatures &features) for (uint32_t pos = 1; pos < numPositions; ++pos) { if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -429,9 +427,9 @@ readFeatures(search::index::DocIdAndFeatures &features) _fileReadBias + (reinterpret_cast<unsigned long>(oCompr) << 3) - oPreRead; - features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos; + features.set_bit_length(rawFeaturesEndBitPos - rawFeaturesStartBitPos); while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } if (__builtin_expect(oCompr >= valE, false)) { @@ -450,8 +448,8 @@ readFeatures(search::index::DocIdAndFeatures &features) uint64_t val64; const uint64_t *valE = _valE; - features.clearFeatures(); - features.setRaw(false); + features.clear_features(); + features.set_has_raw_data(false); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; uint32_t numElements = 1; @@ -470,14 +468,13 @@ readFeatures(search::index::DocIdAndFeatures &features) EC); elementId += static_cast<uint32_t>(val64); } - features._elements. - push_back(WordDocElementFeatures(elementId)); + features.elements().emplace_back(elementId); if (fieldParams._hasElementWeights) { UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_ELEMENTWEIGHT, EC); int32_t elementWeight = this->convertToSigned(val64); - features._elements.back().setWeight(elementWeight); + features.elements().back().setWeight(elementWeight); } if (__builtin_expect(oCompr >= valE, false)) { UC64_DECODECONTEXT_STORE(o, _); @@ -489,7 +486,7 @@ readFeatures(search::index::DocIdAndFeatures &features) K_VALUE_POSOCC_ELEMENTLEN, EC); uint32_t elementLen = static_cast<uint32_t>(val64) + 1; - features._elements.back().setElementLen(elementLen); + features.elements().back().setElementLen(elementLen); UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_NUMPOSITIONS, EC); @@ -507,9 +504,8 @@ readFeatures(search::index::DocIdAndFeatures &features) K_VALUE_POSOCC_FIRST_WORDPOS, EC); wordPos = static_cast<uint32_t>(val64); - features._elements.back().incNumOccs(); - features._wordPositions.push_back( - WordDocElementWordPosFeatures(wordPos)); + features.elements().back().incNumOccs(); + features.word_positions().emplace_back(wordPos); } while (0); for (uint32_t pos = 1; pos < numPositions; ++pos) { if (__builtin_expect(oCompr >= valE, false)) { @@ -522,9 +518,8 @@ readFeatures(search::index::DocIdAndFeatures &features) K_VALUE_POSOCC_DELTA_WORDPOS, EC); wordPos += 1 + static_cast<uint32_t>(val64); - features._elements.back().incNumOccs(); - features._wordPositions.push_back( - WordDocElementWordPosFeatures(wordPos)); + features.elements().back().incNumOccs(); + features.word_positions().emplace_back(wordPos); } } UC64_DECODECONTEXT_STORE(o, _); @@ -732,23 +727,19 @@ void EG2PosOccEncodeContext<bigEndian>:: writeFeatures(const search::index::DocIdAndFeatures &features) { - if (features.getRaw()) { - writeBits(&features._blob[0], - features._bitOffset, features._bitLength); + if (features.has_raw_data()) { + writeBits(features.blob().data(), + features.bit_offset(), features.bit_length()); return; } - typedef WordDocElementFeatures Elements; - typedef WordDocElementWordPosFeatures Positions; - std::vector<Elements>::const_iterator element = features._elements.begin(); - - std::vector<Positions>::const_iterator position = - features._wordPositions.begin(); + auto element = features.elements().begin(); + auto position = features.word_positions().begin(); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; - uint32_t numElements = features._elements.size(); + uint32_t numElements = features.elements().size(); if (fieldParams._hasElements) { assert(numElements > 0u); encodeExpGolomb(numElements - 1, @@ -854,8 +845,8 @@ readFeatures(search::index::DocIdAndFeatures &features) uint64_t val64; const uint64_t *valE = _valE; - features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead); - features.setRaw(true); + features.clear_features((oPreRead == 0) ? 0 : 64 - oPreRead); + features.set_has_raw_data(true); const uint64_t *rawFeatures = (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2); uint64_t rawFeaturesStartBitPos = @@ -885,7 +876,7 @@ readFeatures(search::index::DocIdAndFeatures &features) } if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -910,7 +901,7 @@ readFeatures(search::index::DocIdAndFeatures &features) for (uint32_t pos = 0; pos < numPositions; ++pos) { if (__builtin_expect(oCompr >= valE, false)) { while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } UC64_DECODECONTEXT_STORE(o, _); @@ -929,9 +920,9 @@ readFeatures(search::index::DocIdAndFeatures &features) _fileReadBias + (reinterpret_cast<unsigned long>(oCompr) << 3) - oPreRead; - features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos; + features.set_bit_length(rawFeaturesEndBitPos - rawFeaturesStartBitPos); while (rawFeatures < oCompr) { - features._blob.push_back(*rawFeatures); + features.blob().push_back(*rawFeatures); ++rawFeatures; } if (__builtin_expect(oCompr >= valE, false)) { @@ -950,8 +941,8 @@ readFeatures(search::index::DocIdAndFeatures &features) uint64_t val64; const uint64_t *valE = _valE; - features.clearFeatures(); - features.setRaw(false); + features.clear_features(); + features.set_has_raw_data(false); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; @@ -972,14 +963,13 @@ readFeatures(search::index::DocIdAndFeatures &features) EC); elementId += static_cast<uint32_t>(val64); } - features._elements. - push_back(WordDocElementFeatures(elementId)); + features.elements().emplace_back(elementId); if (fieldParams._hasElementWeights) { UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_ELEMENTWEIGHT, EC); int32_t elementWeight = this->convertToSigned(val64); - features._elements.back().setWeight(elementWeight); + features.elements().back().setWeight(elementWeight); } if (__builtin_expect(oCompr >= valE, false)) { UC64_DECODECONTEXT_STORE(o, _); @@ -991,13 +981,13 @@ readFeatures(search::index::DocIdAndFeatures &features) elementLenK, EC); uint32_t elementLen = static_cast<uint32_t>(val64) + 1; - features._elements.back().setElementLen(elementLen); + features.elements().back().setElementLen(elementLen); UC64_DECODEEXPGOLOMB_SMALL_NS(o, K_VALUE_POSOCC_NUMPOSITIONS, EC); uint32_t numPositions = static_cast<uint32_t>(val64) + 1; - features._bitLength = numPositions * 64; + features.set_bit_length(numPositions * 64); uint32_t wordPosK = EGPosOccEncodeContext<bigEndian>:: calcWordPosK(numPositions, elementLen); @@ -1014,9 +1004,8 @@ readFeatures(search::index::DocIdAndFeatures &features) wordPosK, EC); wordPos += 1 + static_cast<uint32_t>(val64); - features._elements.back().incNumOccs(); - features._wordPositions.push_back( - WordDocElementWordPosFeatures(wordPos)); + features.elements().back().incNumOccs(); + features.word_positions().emplace_back(wordPos); } } UC64_DECODECONTEXT_STORE(o, _); @@ -1227,23 +1216,19 @@ void EGPosOccEncodeContext<bigEndian>:: writeFeatures(const search::index::DocIdAndFeatures &features) { - if (features.getRaw()) { - writeBits(&features._blob[0], - features._bitOffset, features._bitLength); + if (features.has_raw_data()) { + writeBits(features.blob().data(), + features.bit_offset(), features.bit_length()); return; } - typedef WordDocElementFeatures Elements; - typedef WordDocElementWordPosFeatures Positions; - - std::vector<Elements>::const_iterator element = features._elements.begin(); - std::vector<Positions>::const_iterator position = - features._wordPositions.begin(); + auto element = features.elements().begin(); + auto position = features.word_positions().begin(); const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; uint32_t elementLenK = calcElementLenK(fieldParams._avgElemLen); - uint32_t numElements = features._elements.size(); + uint32_t numElements = features.elements().size(); if (fieldParams._hasElements) { assert(numElements > 0u); encodeExpGolomb(numElements - 1, diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h index a5d46045ec5..d500dacd7d4 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h @@ -48,9 +48,9 @@ public: assert(elementLen == _elements.back().getElementLen()); } assert(_elements.back().getNumOccs() == 0 || - wordPos > _wordPositions.back().getWordPos()); + wordPos > _word_positions.back().getWordPos()); _elements.back().incNumOccs(); - _wordPositions.emplace_back(wordPos); + _word_positions.emplace_back(wordPos); } }; diff --git a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt index 2fea4f2bab7..ba608467c8a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt @@ -20,6 +20,7 @@ vespa_add_library(searchlib_diskindex OBJECT wordnummapper.cpp zc4_posting_header.cpp zc4_posting_reader.cpp + zc4_posting_reader_base.cpp zc4_posting_writer.cpp zc4_posting_writer_base.cpp zcbuf.cpp diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp index 96b106a15da..a41f0412294 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp @@ -63,7 +63,7 @@ void FieldReader::readDocIdAndFeatures() { _oldposoccfile->readDocIdAndFeatures(_docIdAndFeatures); - _docIdAndFeatures._docId = _docIdMapper.mapDocId(_docIdAndFeatures._docId); + _docIdAndFeatures.set_doc_id(_docIdMapper.mapDocId(_docIdAndFeatures.doc_id())); } @@ -75,13 +75,13 @@ FieldReader::read() readCounts(); if (_wordNum == noWordNumHigh()) { assert(_residue == 0); - _docIdAndFeatures._docId = NO_DOC; + _docIdAndFeatures.set_doc_id(NO_DOC); return; } } --_residue; readDocIdAndFeatures(); - if (_docIdAndFeatures._docId != NO_DOC) { + if (_docIdAndFeatures.doc_id() != NO_DOC) { return; } } @@ -267,26 +267,26 @@ FieldReaderStripInfo::read() if (_wordNum == noWordNumHigh()) { return; } - assert(!features.getRaw()); - uint32_t numElements = features._elements.size(); + assert(!features.has_raw_data()); + uint32_t numElements = features.elements().size(); assert(numElements > 0); std::vector<Element>::iterator element = - features._elements.begin(); + features.elements().begin(); if (_hasElements) { if (!_hasElementWeights) { for (uint32_t elementDone = 0; elementDone < numElements; ++elementDone, ++element) { element->setWeight(1); } - assert(element == features._elements.end()); + assert(element == features.elements().end()); } } else { if (element->getElementId() != 0) { continue; // Drop this entry, try to read new entry } element->setWeight(1); - features._wordPositions.resize(element->getNumOccs()); + features.word_positions().resize(element->getNumOccs()); if (numElements > 1) { - features._elements.resize(1); + features.elements().resize(1); } } break; diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.h b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h index a73ffa149a9..50748d037c0 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h @@ -85,7 +85,7 @@ public: bool operator<(const FieldReader &rhs) const { return _wordNum < rhs._wordNum || (_wordNum == rhs._wordNum && - _docIdAndFeatures._docId < rhs._docIdAndFeatures._docId); + _docIdAndFeatures.doc_id() < rhs._docIdAndFeatures.doc_id()); } virtual void setup(const WordNumMapping &wordNumMapping, const DocIdMapping &docIdMapping); diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h index 1e9afb717e8..e5aa9788071 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -58,11 +58,11 @@ public: void newWord(vespalib::stringref word); void add(const DocIdAndFeatures &features) { - assert(features._docId < _docIdLimit); - assert(features._docId > _prevDocId); + assert(features.doc_id() < _docIdLimit); + assert(features.doc_id() > _prevDocId); _posoccfile->writeDocIdAndFeatures(features); - _bvc.add(features._docId); - _prevDocId = features._docId; + _bvc.add(features.doc_id()); + _prevDocId = features.doc_id(); } uint64_t getSparseWordNum() const { return _wordNum; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp index 5288d054ef0..2149a44f5ce 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp @@ -20,35 +20,13 @@ Zc4PostingHeader::Zc4PostingHeader() { } -template <bool bigEndian> void Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms) { - using EC = bitcompression::FeatureEncodeContext<bigEndian>; - UC64_DECODECONTEXT_CONSTRUCTOR(o, decode_context._); - uint32_t length; - uint64_t val64; - - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); - _num_docs = static_cast<uint32_t>(val64) + 1; - bool has_more = false; - if (__builtin_expect(_num_docs >= params._min_chunk_docs, false)) { - if (bigEndian) { - has_more = static_cast<int64_t>(oVal) < 0; - oVal <<= 1; - length = 1; - } else { - has_more = (oVal & 1) != 0; - oVal >>= 1; - length = 1; - } - UC64_READBITS_NS(o, EC); - } - if (params._dynamic_k) { - _doc_id_k = EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit); - } else { - _doc_id_k = K_VALUE_ZCPOSTING_LASTDOCID; - } + using EC = bitcompression::FeatureEncodeContext<true>; + _num_docs = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_NUMDOCS) + 1; + bool has_more = (_num_docs >= params._min_chunk_docs) ? (decode_context.readBits(1) != 0) : false; + _doc_id_k = params._dynamic_k ? EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit) : K_VALUE_ZCPOSTING_LASTDOCID; if (_num_docs < params._min_skip_docs && !_has_more) { _doc_ids_size = 0; _l1_skip_size = 0; @@ -58,47 +36,16 @@ Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, cons _features_size = 0; _last_doc_id = 0; } else { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC); - _doc_ids_size = val64 + 1; - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC); - _l1_skip_size = val64; - if (_l1_skip_size != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC); - _l2_skip_size = val64; - } - if (_l2_skip_size != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC); - _l3_skip_size = val64; - } - if (_l3_skip_size != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC); - _l4_skip_size = val64; - } - if (params._encode_features) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC); - _features_size = val64; - } else { - _features_size = 0; - } - UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k, EC); - _last_doc_id = params._doc_id_limit - 1 - val64; - uint64_t bytePad = oPreRead & 7; - if (bytePad > 0) { - length = bytePad; - UC64_READBITS_NS(o, EC); - } + _doc_ids_size = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_DOCIDSSIZE) + 1; + _l1_skip_size = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L1SKIPSIZE); + _l2_skip_size = (_l1_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L2SKIPSIZE) : 0; + _l3_skip_size = (_l2_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L3SKIPSIZE) : 0; + _l4_skip_size = (_l3_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L4SKIPSIZE) : 0; + _features_size = params._encode_features ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_FEATURESSIZE) : 0; + _last_doc_id = params._doc_id_limit - 1 - decode_context.decode_exp_golomb(_doc_id_k); + decode_context.align(8); } - UC64_DECODECONTEXT_STORE(o, decode_context._); _has_more = has_more; } -template -void -Zc4PostingHeader::read<false>(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms); - -template -void -Zc4PostingHeader::read<true>(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms); - - } diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h index 7382f59d176..d4032864e16 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h @@ -27,7 +27,6 @@ struct Zc4PostingHeader { Zc4PostingHeader(); - template <bool bigEndian> void read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms); }; diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp index 30cef1dc258..a09c26d7985 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp @@ -1,6 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "zc4_posting_reader.h" +#include "zc4_posting_header.h" #include <vespa/searchlib/index/docidandfeatures.h> namespace search::diskindex { @@ -12,41 +13,8 @@ using bitcompression::FeatureEncodeContext; template <bool bigEndian> Zc4PostingReader<bigEndian>::Zc4PostingReader(bool dynamic_k) - : _decodeContext(nullptr), - _docIdK(K_VALUE_ZCPOSTING_DELTA_DOCID), - _prevDocId(0), - _numDocs(0), - _readContext(sizeof(uint64_t)), - _has_more(false), - _posting_params(64, 1 << 30, 10000000, dynamic_k, true), - _lastDocId(0), - _zcDocIds(), - _l1Skip(), - _l2Skip(), - _l3Skip(), - _l4Skip(), - _chunkNo(0), - _l1SkipDocId(0), - _l1SkipDocIdPos(0), - _l1SkipFeaturesPos(0), - _l2SkipDocId(0), - _l2SkipDocIdPos(0), - _l2SkipL1SkipPos(0), - _l2SkipFeaturesPos(0), - _l3SkipDocId(0), - _l3SkipDocIdPos(0), - _l3SkipL1SkipPos(0), - _l3SkipL2SkipPos(0), - _l3SkipFeaturesPos(0), - _l4SkipDocId(0), - _l4SkipDocIdPos(0), - _l4SkipL1SkipPos(0), - _l4SkipL2SkipPos(0), - _l4SkipL3SkipPos(0), - _l4SkipFeaturesPos(0), - _featuresSize(0), - _counts(), - _residue(0) + : Zc4PostingReaderBase(dynamic_k), + _decodeContext(nullptr) { } @@ -57,370 +25,50 @@ Zc4PostingReader<bigEndian>::~Zc4PostingReader() template <bool bigEndian> void -Zc4PostingReader<bigEndian>::read_common_word_doc_id_and_features(DocIdAndFeatures &features) -{ - if ((_zcDocIds._valI >= _zcDocIds._valE) && _has_more) { - read_word_start(); // Read start of next chunk - } - // Split docid & features. - assert(_zcDocIds._valI < _zcDocIds._valE); - uint32_t docIdPos = _zcDocIds.pos(); - uint32_t docId = _prevDocId + 1 + _zcDocIds.decode(); - features._docId = docId; - _prevDocId = docId; - assert(docId <= _lastDocId); - if (docId > _l1SkipDocId) { - _l1SkipDocIdPos += _l1Skip.decode() + 1; - assert(docIdPos == _l1SkipDocIdPos); - uint64_t featuresPos = _decodeContext->getReadOffset(); - if (_posting_params._encode_features) { - _l1SkipFeaturesPos += _l1Skip.decode() + 1; - assert(featuresPos == _l1SkipFeaturesPos); - } - (void) featuresPos; - if (docId > _l2SkipDocId) { - _l2SkipDocIdPos += _l2Skip.decode() + 1; - assert(docIdPos == _l2SkipDocIdPos); - if (_posting_params._encode_features) { - _l2SkipFeaturesPos += _l2Skip.decode() + 1; - assert(featuresPos == _l2SkipFeaturesPos); - } - _l2SkipL1SkipPos += _l2Skip.decode() + 1; - assert(_l1Skip.pos() == _l2SkipL1SkipPos); - if (docId > _l3SkipDocId) { - _l3SkipDocIdPos += _l3Skip.decode() + 1; - assert(docIdPos == _l3SkipDocIdPos); - if (_posting_params._encode_features) { - _l3SkipFeaturesPos += _l3Skip.decode() + 1; - assert(featuresPos == _l3SkipFeaturesPos); - } - _l3SkipL1SkipPos += _l3Skip.decode() + 1; - assert(_l1Skip.pos() == _l3SkipL1SkipPos); - _l3SkipL2SkipPos += _l3Skip.decode() + 1; - assert(_l2Skip.pos() == _l3SkipL2SkipPos); - if (docId > _l4SkipDocId) { - _l4SkipDocIdPos += _l4Skip.decode() + 1; - assert(docIdPos == _l4SkipDocIdPos); - (void) docIdPos; - if (_posting_params._encode_features) { - _l4SkipFeaturesPos += _l4Skip.decode() + 1; - assert(featuresPos == _l4SkipFeaturesPos); - } - _l4SkipL1SkipPos += _l4Skip.decode() + 1; - assert(_l1Skip.pos() == _l4SkipL1SkipPos); - _l4SkipL2SkipPos += _l4Skip.decode() + 1; - assert(_l2Skip.pos() == _l4SkipL2SkipPos); - _l4SkipL3SkipPos += _l4Skip.decode() + 1; - assert(_l3Skip.pos() == _l4SkipL3SkipPos); - _l4SkipDocId += _l4Skip.decode() + 1; - assert(_l4SkipDocId <= _lastDocId); - assert(_l4SkipDocId >= docId); - } - _l3SkipDocId += _l3Skip.decode() + 1; - assert(_l3SkipDocId <= _lastDocId); - assert(_l3SkipDocId <= _l4SkipDocId); - assert(_l3SkipDocId >= docId); - } - _l2SkipDocId += _l2Skip.decode() + 1; - assert(_l2SkipDocId <= _lastDocId); - assert(_l2SkipDocId <= _l4SkipDocId); - assert(_l2SkipDocId <= _l3SkipDocId); - assert(_l2SkipDocId >= docId); - } - _l1SkipDocId += _l1Skip.decode() + 1; - assert(_l1SkipDocId <= _lastDocId); - assert(_l1SkipDocId <= _l4SkipDocId); - assert(_l1SkipDocId <= _l3SkipDocId); - assert(_l1SkipDocId <= _l2SkipDocId); - assert(_l1SkipDocId >= docId); - } - if (docId < _lastDocId) { - // Assert more space available when not yet at last docid - assert(_zcDocIds._valI < _zcDocIds._valE); - } else { - // Assert that space has been used when at last docid - assert(_zcDocIds._valI == _zcDocIds._valE); - // Assert that we've read to end of skip info - assert(_l1SkipDocId == _lastDocId); - assert(_l2SkipDocId == _lastDocId); - assert(_l3SkipDocId == _lastDocId); - assert(_l4SkipDocId == _lastDocId); - if (!_has_more) { - _chunkNo = 0; - } - } - if (_posting_params._encode_features) { - _decodeContext->readFeatures(features); - } - --_residue; -} - -template <bool bigEndian> -void Zc4PostingReader<bigEndian>::read_doc_id_and_features(DocIdAndFeatures &features) { if (_residue == 0 && !_has_more) { - if (_residue == 0) { - // Don't read past end of posting list. - features.clear(static_cast<uint32_t>(-1)); - return; - } - } - if (_lastDocId > 0) { - read_common_word_doc_id_and_features(features); + // Don't read past end of posting list. + features.clear(static_cast<uint32_t>(-1)); return; } - // Interleaves docid & features - using EC = FeatureEncodeContext<bigEndian>; - DecodeContext &d = *_decodeContext; - uint32_t length; - uint64_t val64; - UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); - - UC64_DECODEEXPGOLOMB_SMALL_NS(o, _docIdK, EC); - uint32_t docId = _prevDocId + 1 + val64; - features._docId = docId; - _prevDocId = docId; - UC64_DECODECONTEXT_STORE(o, d._); - if (__builtin_expect(oCompr >= d._valE, false)) { - _readContext.readComprBuffer(); - } - if (_posting_params._encode_features) { - _decodeContext->readFeatures(features); - } - --_residue; -} - -template <bool bigEndian> -void -Zc4PostingReader<bigEndian>::read_word_start_with_skip() -{ - using EC = FeatureEncodeContext<bigEndian>; - DecodeContext &d = *_decodeContext; - UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); - uint32_t length; - uint64_t val64; - const uint64_t *valE = d._valE; - - if (_has_more) { - ++_chunkNo; - } else { - _chunkNo = 0; - } - assert(_numDocs >= _posting_params._min_skip_docs || _has_more); - bool has_more = false; - if (__builtin_expect(_numDocs >= _posting_params._min_chunk_docs, false)) { - if (bigEndian) { - has_more = static_cast<int64_t>(oVal) < 0; - oVal <<= 1; - } else { - has_more = (oVal & 1) != 0; - oVal >>= 1; - } - length = 1; - UC64_READBITS_NS(o, EC); - } - if (_posting_params._dynamic_k) { - _docIdK = EC::calcDocIdK((_has_more || has_more) ? 1 : _numDocs, - _posting_params._doc_id_limit); - } - if (_has_more || has_more) { - assert(has_more == (_chunkNo + 1 < _counts._segments.size())); - assert(_numDocs == _counts._segments[_chunkNo]._numDocs); - if (has_more) { - assert(_numDocs >= _posting_params._min_skip_docs); - assert(_numDocs >= _posting_params._min_chunk_docs); - } - } else { - assert(_numDocs >= _posting_params._min_skip_docs); - assert(_numDocs == _counts._numDocs); - } - if (__builtin_expect(oCompr >= valE, false)) { - UC64_DECODECONTEXT_STORE(o, d._); - _readContext.readComprBuffer(); - valE = d._valE; - UC64_DECODECONTEXT_LOAD(o, d._); - } - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC); - uint32_t docIdsSize = val64 + 1; - UC64_DECODEEXPGOLOMB_NS(o, - K_VALUE_ZCPOSTING_L1SKIPSIZE, - EC); - uint32_t l1SkipSize = val64; - if (__builtin_expect(oCompr >= valE, false)) { - UC64_DECODECONTEXT_STORE(o, d._); - _readContext.readComprBuffer(); - valE = d._valE; - UC64_DECODECONTEXT_LOAD(o, d._); - } - uint32_t l2SkipSize = 0; - if (l1SkipSize != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC); - l2SkipSize = val64; - } - uint32_t l3SkipSize = 0; - if (l2SkipSize != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC); - l3SkipSize = val64; - } - if (__builtin_expect(oCompr >= valE, false)) { - UC64_DECODECONTEXT_STORE(o, d._); - _readContext.readComprBuffer(); - valE = d._valE; - UC64_DECODECONTEXT_LOAD(o, d._); - } - uint32_t l4SkipSize = 0; - if (l3SkipSize != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC); - l4SkipSize = val64; - } - if (_posting_params._encode_features) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC); - _featuresSize = val64; - } - if (__builtin_expect(oCompr >= valE, false)) { - UC64_DECODECONTEXT_STORE(o, d._); - _readContext.readComprBuffer(); - valE = d._valE; - UC64_DECODECONTEXT_LOAD(o, d._); - } - if (_posting_params._dynamic_k) { - UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); + if (_last_doc_id > 0) { + read_common_word_doc_id(*_decodeContext); } else { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_LASTDOCID, EC); - } - _lastDocId = _posting_params._doc_id_limit - 1 - val64; - if (_has_more || has_more) { - assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc); - } - - if (__builtin_expect(oCompr >= valE, false)) { + // Interleaves docid & features + using EC = FeatureEncodeContext<bigEndian>; + DecodeContext &d = *_decodeContext; + uint32_t length; + uint64_t val64; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + + UC64_DECODEEXPGOLOMB_SMALL_NS(o, _doc_id_k, EC); + uint32_t docId = _prev_doc_id + 1 + val64; + _prev_doc_id = docId; UC64_DECODECONTEXT_STORE(o, d._); - _readContext.readComprBuffer(); - valE = d._valE; - UC64_DECODECONTEXT_LOAD(o, d._); - } - uint64_t bytePad = oPreRead & 7; - if (bytePad > 0) { - length = bytePad; - if (bigEndian) { - oVal <<= length; - } else { - oVal >>= length; + if (__builtin_expect(oCompr >= d._valE, false)) { + _readContext.readComprBuffer(); } - UC64_READBITS_NS(o, EC); - } - UC64_DECODECONTEXT_STORE(o, d._); - if (__builtin_expect(oCompr >= valE, false)) { - _readContext.readComprBuffer(); - } - _zcDocIds.clearReserve(docIdsSize); - _l1Skip.clearReserve(l1SkipSize); - _l2Skip.clearReserve(l2SkipSize); - _l3Skip.clearReserve(l3SkipSize); - _l4Skip.clearReserve(l4SkipSize); - _decodeContext->readBytes(_zcDocIds._valI, docIdsSize); - _zcDocIds._valE = _zcDocIds._valI + docIdsSize; - if (l1SkipSize > 0) { - _decodeContext->readBytes(_l1Skip._valI, l1SkipSize); - } - _l1Skip._valE = _l1Skip._valI + l1SkipSize; - if (l2SkipSize > 0) { - _decodeContext->readBytes(_l2Skip._valI, l2SkipSize); - } - _l2Skip._valE = _l2Skip._valI + l2SkipSize; - if (l3SkipSize > 0) { - _decodeContext->readBytes(_l3Skip._valI, l3SkipSize); } - _l3Skip._valE = _l3Skip._valI + l3SkipSize; - if (l4SkipSize > 0) { - _decodeContext->readBytes(_l4Skip._valI, l4SkipSize); - } - _l4Skip._valE = _l4Skip._valI + l4SkipSize; - - if (l1SkipSize > 0) { - _l1SkipDocId = _l1Skip.decode() + 1 + _prevDocId; - } else { - _l1SkipDocId = _lastDocId; - } - if (l2SkipSize > 0) { - _l2SkipDocId = _l2Skip.decode() + 1 + _prevDocId; - } else { - _l2SkipDocId = _lastDocId; - } - if (l3SkipSize > 0) { - _l3SkipDocId = _l3Skip.decode() + 1 + _prevDocId; - } else { - _l3SkipDocId = _lastDocId; - } - if (l4SkipSize > 0) { - _l4SkipDocId = _l4Skip.decode() + 1 + _prevDocId; - } else { - _l4SkipDocId = _lastDocId; + features.set_doc_id(_prev_doc_id); + if (_posting_params._encode_features) { + _decodeContext->readFeatures(features); } - _l1SkipDocIdPos = 0; - _l1SkipFeaturesPos = _decodeContext->getReadOffset(); - _l2SkipDocIdPos = 0; - _l2SkipL1SkipPos = 0; - _l2SkipFeaturesPos = _decodeContext->getReadOffset(); - _l3SkipDocIdPos = 0; - _l3SkipL1SkipPos = 0; - _l3SkipL2SkipPos = 0; - _l3SkipFeaturesPos = _decodeContext->getReadOffset(); - _l4SkipDocIdPos = 0; - _l4SkipL1SkipPos = 0; - _l4SkipL2SkipPos = 0; - _l4SkipL3SkipPos = 0; - _l4SkipFeaturesPos = _decodeContext->getReadOffset(); - _has_more = has_more; - // Decode context is now positioned at start of features + --_residue; } template <bool bigEndian> void Zc4PostingReader<bigEndian>::read_word_start() { - using EC = FeatureEncodeContext<bigEndian>; - UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); - uint32_t length; - uint64_t val64; - const uint64_t *valE = _decodeContext->_valE; - - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); - UC64_DECODECONTEXT_STORE(o, _decodeContext->_); - if (oCompr >= valE) { - _readContext.readComprBuffer(); - } - _numDocs = static_cast<uint32_t>(val64) + 1; - _residue = _numDocs; - _prevDocId = _has_more ? _lastDocId : 0u; - assert(_numDocs <= _counts._numDocs); - assert(_numDocs == _counts._numDocs || - _numDocs >= _posting_params._min_chunk_docs || - _has_more); - - if (_numDocs >= _posting_params._min_skip_docs || _has_more) { - read_word_start_with_skip(); - // Decode context is not positioned at start of features - } else { - if (_posting_params._dynamic_k) { - _docIdK = EC::calcDocIdK(_numDocs, _posting_params._doc_id_limit); - } - _lastDocId = 0u; - // Decode context is not positioned at start of docids & features - } + Zc4PostingReaderBase::read_word_start(*_decodeContext); } template <bool bigEndian> void Zc4PostingReader<bigEndian>::set_counts(const PostingListCounts &counts) { - assert(!_has_more && _residue == 0); // Previous words must have been read. - _counts = counts; - assert((_counts._numDocs == 0) == (_counts._bitLength == 0)); - if (_counts._numDocs > 0) { - read_word_start(); - } + Zc4PostingReaderBase::set_counts(*_decodeContext, counts); } template <bool bigEndian> diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h index d8161da15d5..59a660407b4 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h @@ -2,14 +2,7 @@ #pragma once -#include "zc4_posting_writer.h" -#include <vespa/searchlib/index/postinglistfile.h> -#include <vespa/fastos/file.h> -#include "zc4_posting_params.h" - -namespace search::index { - class PostingListCountFileSeqRead; -} +#include "zc4_posting_reader_base.h" namespace search::diskindex { @@ -23,57 +16,13 @@ namespace search::diskindex { * interleaved. */ template <bool bigEndian> -class Zc4PostingReader +class Zc4PostingReader : public Zc4PostingReaderBase { protected: using DecodeContext = bitcompression::FeatureDecodeContext<bigEndian>; DecodeContext *_decodeContext; - uint32_t _docIdK; - uint32_t _prevDocId; // Previous document id - uint32_t _numDocs; // Documents in chunk or word - search::ComprFileReadContext _readContext; - bool _has_more; - Zc4PostingParams _posting_params; - uint32_t _lastDocId; // last document in chunk or word - - ZcBuf _zcDocIds; // Document id deltas - ZcBuf _l1Skip; // L1 skip info - ZcBuf _l2Skip; // L2 skip info - ZcBuf _l3Skip; // L3 skip info - ZcBuf _l4Skip; // L4 skip info - - uint64_t _numWords; // Number of words in file - uint32_t _chunkNo; // Chunk number - - // Variables for validating skip information while reading - uint32_t _l1SkipDocId; - uint32_t _l1SkipDocIdPos; - uint64_t _l1SkipFeaturesPos; - uint32_t _l2SkipDocId; - uint32_t _l2SkipDocIdPos; - uint32_t _l2SkipL1SkipPos; - uint64_t _l2SkipFeaturesPos; - uint32_t _l3SkipDocId; - uint32_t _l3SkipDocIdPos; - uint32_t _l3SkipL1SkipPos; - uint32_t _l3SkipL2SkipPos; - uint64_t _l3SkipFeaturesPos; - uint32_t _l4SkipDocId; - uint32_t _l4SkipDocIdPos; - uint32_t _l4SkipL1SkipPos; - uint32_t _l4SkipL2SkipPos; - uint32_t _l4SkipL3SkipPos; - uint64_t _l4SkipFeaturesPos; - - // Variable for validating chunk information while reading - uint64_t _featuresSize; - index::PostingListCounts _counts; - - uint32_t _residue; // Number of unread documents after word header - void read_common_word_doc_id_and_features(index::DocIdAndFeatures &features); - void read_word_start_with_skip(); void read_word_start(); public: Zc4PostingReader(bool dynamic_k); @@ -86,8 +35,6 @@ public: void set_counts(const index::PostingListCounts &counts); void set_decode_features(DecodeContext *decode_features); DecodeContext &get_decode_features() const { return *_decodeContext; } - ComprFileReadContext &get_read_context() { return _readContext; } - Zc4PostingParams &get_posting_params() { return _posting_params; } }; extern template class Zc4PostingReader<false>; diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp new file mode 100644 index 00000000000..18963e22404 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp @@ -0,0 +1,275 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "zc4_posting_reader_base.h" +#include "zc4_posting_header.h" +#include <vespa/searchlib/index/docidandfeatures.h> + +namespace search::diskindex { + +using index::PostingListCounts; +using index::DocIdAndFeatures; +using bitcompression::FeatureEncodeContext; +using bitcompression::DecodeContext64Base; + + +Zc4PostingReaderBase::Zc4PostingReaderBase(bool dynamic_k) + : _doc_id_k(K_VALUE_ZCPOSTING_DELTA_DOCID), + _prev_doc_id(0), + _num_docs(0), + _readContext(sizeof(uint64_t)), + _has_more(false), + _posting_params(64, 1 << 30, 10000000, dynamic_k, true), + _last_doc_id(0), + _zcDocIds(), + _l1Skip(), + _l2Skip(), + _l3Skip(), + _l4Skip(), + _chunkNo(0), + _l1SkipDocId(0), + _l1SkipDocIdPos(0), + _l1SkipFeaturesPos(0), + _l2SkipDocId(0), + _l2SkipDocIdPos(0), + _l2SkipL1SkipPos(0), + _l2SkipFeaturesPos(0), + _l3SkipDocId(0), + _l3SkipDocIdPos(0), + _l3SkipL1SkipPos(0), + _l3SkipL2SkipPos(0), + _l3SkipFeaturesPos(0), + _l4SkipDocId(0), + _l4SkipDocIdPos(0), + _l4SkipL1SkipPos(0), + _l4SkipL2SkipPos(0), + _l4SkipL3SkipPos(0), + _l4SkipFeaturesPos(0), + _features_size(0), + _counts(), + _residue(0) +{ +} + +Zc4PostingReaderBase::~Zc4PostingReaderBase() +{ +} + +void +Zc4PostingReaderBase::read_common_word_doc_id(DecodeContext64Base &decode_context) +{ + if ((_zcDocIds._valI >= _zcDocIds._valE) && _has_more) { + read_word_start(decode_context); // Read start of next chunk + } + // Split docid & features. + assert(_zcDocIds._valI < _zcDocIds._valE); + uint32_t docIdPos = _zcDocIds.pos(); + uint32_t docId = _prev_doc_id + 1 + _zcDocIds.decode(); + _prev_doc_id = docId; + assert(docId <= _last_doc_id); + if (docId > _l1SkipDocId) { + _l1SkipDocIdPos += _l1Skip.decode() + 1; + assert(docIdPos == _l1SkipDocIdPos); + uint64_t featuresPos = decode_context.getReadOffset(); + if (_posting_params._encode_features) { + _l1SkipFeaturesPos += _l1Skip.decode() + 1; + assert(featuresPos == _l1SkipFeaturesPos); + } + (void) featuresPos; + if (docId > _l2SkipDocId) { + _l2SkipDocIdPos += _l2Skip.decode() + 1; + assert(docIdPos == _l2SkipDocIdPos); + if (_posting_params._encode_features) { + _l2SkipFeaturesPos += _l2Skip.decode() + 1; + assert(featuresPos == _l2SkipFeaturesPos); + } + _l2SkipL1SkipPos += _l2Skip.decode() + 1; + assert(_l1Skip.pos() == _l2SkipL1SkipPos); + if (docId > _l3SkipDocId) { + _l3SkipDocIdPos += _l3Skip.decode() + 1; + assert(docIdPos == _l3SkipDocIdPos); + if (_posting_params._encode_features) { + _l3SkipFeaturesPos += _l3Skip.decode() + 1; + assert(featuresPos == _l3SkipFeaturesPos); + } + _l3SkipL1SkipPos += _l3Skip.decode() + 1; + assert(_l1Skip.pos() == _l3SkipL1SkipPos); + _l3SkipL2SkipPos += _l3Skip.decode() + 1; + assert(_l2Skip.pos() == _l3SkipL2SkipPos); + if (docId > _l4SkipDocId) { + _l4SkipDocIdPos += _l4Skip.decode() + 1; + assert(docIdPos == _l4SkipDocIdPos); + (void) docIdPos; + if (_posting_params._encode_features) { + _l4SkipFeaturesPos += _l4Skip.decode() + 1; + assert(featuresPos == _l4SkipFeaturesPos); + } + _l4SkipL1SkipPos += _l4Skip.decode() + 1; + assert(_l1Skip.pos() == _l4SkipL1SkipPos); + _l4SkipL2SkipPos += _l4Skip.decode() + 1; + assert(_l2Skip.pos() == _l4SkipL2SkipPos); + _l4SkipL3SkipPos += _l4Skip.decode() + 1; + assert(_l3Skip.pos() == _l4SkipL3SkipPos); + _l4SkipDocId += _l4Skip.decode() + 1; + assert(_l4SkipDocId <= _last_doc_id); + assert(_l4SkipDocId >= docId); + } + _l3SkipDocId += _l3Skip.decode() + 1; + assert(_l3SkipDocId <= _last_doc_id); + assert(_l3SkipDocId <= _l4SkipDocId); + assert(_l3SkipDocId >= docId); + } + _l2SkipDocId += _l2Skip.decode() + 1; + assert(_l2SkipDocId <= _last_doc_id); + assert(_l2SkipDocId <= _l4SkipDocId); + assert(_l2SkipDocId <= _l3SkipDocId); + assert(_l2SkipDocId >= docId); + } + _l1SkipDocId += _l1Skip.decode() + 1; + assert(_l1SkipDocId <= _last_doc_id); + assert(_l1SkipDocId <= _l4SkipDocId); + assert(_l1SkipDocId <= _l3SkipDocId); + assert(_l1SkipDocId <= _l2SkipDocId); + assert(_l1SkipDocId >= docId); + } + if (docId < _last_doc_id) { + // Assert more space available when not yet at last docid + assert(_zcDocIds._valI < _zcDocIds._valE); + } else { + // Assert that space has been used when at last docid + assert(_zcDocIds._valI == _zcDocIds._valE); + // Assert that we've read to end of skip info + assert(_l1SkipDocId == _last_doc_id); + assert(_l2SkipDocId == _last_doc_id); + assert(_l3SkipDocId == _last_doc_id); + assert(_l4SkipDocId == _last_doc_id); + if (!_has_more) { + _chunkNo = 0; + } + } +} + +void +Zc4PostingReaderBase::read_word_start_with_skip(DecodeContext64Base &decode_context, const Zc4PostingHeader &header) +{ + if (_has_more) { + ++_chunkNo; + } else { + _chunkNo = 0; + } + assert(_num_docs >= _posting_params._min_skip_docs || _has_more); + bool has_more = header._has_more; + if (_has_more || has_more) { + assert(has_more == (_chunkNo + 1 < _counts._segments.size())); + assert(_num_docs == _counts._segments[_chunkNo]._numDocs); + if (has_more) { + assert(_num_docs >= _posting_params._min_skip_docs); + assert(_num_docs >= _posting_params._min_chunk_docs); + } + } else { + assert(_num_docs >= _posting_params._min_skip_docs); + assert(_num_docs == _counts._numDocs); + } + uint32_t docIdsSize = header._doc_ids_size; + uint32_t l1SkipSize = header._l1_skip_size; + uint32_t l2SkipSize = header._l2_skip_size; + uint32_t l3SkipSize = header._l3_skip_size; + uint32_t l4SkipSize = header._l4_skip_size; + if (_has_more || has_more) { + assert(_last_doc_id == _counts._segments[_chunkNo]._lastDoc); + } + _zcDocIds.clearReserve(docIdsSize); + _l1Skip.clearReserve(l1SkipSize); + _l2Skip.clearReserve(l2SkipSize); + _l3Skip.clearReserve(l3SkipSize); + _l4Skip.clearReserve(l4SkipSize); + decode_context.readBytes(_zcDocIds._valI, docIdsSize); + _zcDocIds._valE = _zcDocIds._valI + docIdsSize; + if (l1SkipSize > 0) { + decode_context.readBytes(_l1Skip._valI, l1SkipSize); + } + _l1Skip._valE = _l1Skip._valI + l1SkipSize; + if (l2SkipSize > 0) { + decode_context.readBytes(_l2Skip._valI, l2SkipSize); + } + _l2Skip._valE = _l2Skip._valI + l2SkipSize; + if (l3SkipSize > 0) { + decode_context.readBytes(_l3Skip._valI, l3SkipSize); + } + _l3Skip._valE = _l3Skip._valI + l3SkipSize; + if (l4SkipSize > 0) { + decode_context.readBytes(_l4Skip._valI, l4SkipSize); + } + _l4Skip._valE = _l4Skip._valI + l4SkipSize; + + if (l1SkipSize > 0) { + _l1SkipDocId = _l1Skip.decode() + 1 + _prev_doc_id; + } else { + _l1SkipDocId = _last_doc_id; + } + if (l2SkipSize > 0) { + _l2SkipDocId = _l2Skip.decode() + 1 + _prev_doc_id; + } else { + _l2SkipDocId = _last_doc_id; + } + if (l3SkipSize > 0) { + _l3SkipDocId = _l3Skip.decode() + 1 + _prev_doc_id; + } else { + _l3SkipDocId = _last_doc_id; + } + if (l4SkipSize > 0) { + _l4SkipDocId = _l4Skip.decode() + 1 + _prev_doc_id; + } else { + _l4SkipDocId = _last_doc_id; + } + _l1SkipDocIdPos = 0; + _l1SkipFeaturesPos = decode_context.getReadOffset(); + _l2SkipDocIdPos = 0; + _l2SkipL1SkipPos = 0; + _l2SkipFeaturesPos = decode_context.getReadOffset(); + _l3SkipDocIdPos = 0; + _l3SkipL1SkipPos = 0; + _l3SkipL2SkipPos = 0; + _l3SkipFeaturesPos = decode_context.getReadOffset(); + _l4SkipDocIdPos = 0; + _l4SkipL1SkipPos = 0; + _l4SkipL2SkipPos = 0; + _l4SkipL3SkipPos = 0; + _l4SkipFeaturesPos = decode_context.getReadOffset(); + _has_more = has_more; + // Decode context is now positioned at start of features +} + +void +Zc4PostingReaderBase::read_word_start(DecodeContext64Base &decode_context) +{ + Zc4PostingHeader header; + header._has_more = _has_more; + header.read(decode_context, _posting_params); + _num_docs = header._num_docs; + _residue = _num_docs; + _prev_doc_id = _has_more ? _last_doc_id : 0u; + _doc_id_k = header._doc_id_k; + _last_doc_id = header._last_doc_id; + _features_size = header._features_size; + assert(_num_docs <= _counts._numDocs); + assert(_num_docs == _counts._numDocs || + _num_docs >= _posting_params._min_chunk_docs || + _has_more); + + if (_num_docs >= _posting_params._min_skip_docs || _has_more) { + read_word_start_with_skip(decode_context, header); + } +} + +void +Zc4PostingReaderBase::set_counts(DecodeContext64Base &decode_context, const PostingListCounts &counts) +{ + assert(!_has_more && _residue == 0); // Previous words must have been read. + _counts = counts; + assert((_counts._numDocs == 0) == (_counts._bitLength == 0)); + if (_counts._numDocs > 0) { + read_word_start(decode_context); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h new file mode 100644 index 00000000000..f19823936ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h @@ -0,0 +1,79 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "zc4_posting_params.h" +#include "zcbuf.h" +#include <vespa/searchlib/bitcompression/compression.h> +#include <vespa/searchlib/index/postinglistcounts.h> + +namespace search::diskindex { + +class Zc4PostingHeader; + +/* + * Base class for reading posting lists that might have basic skip info. + */ +class Zc4PostingReaderBase +{ + +protected: + uint32_t _doc_id_k; + uint32_t _prev_doc_id; // Previous document id + uint32_t _num_docs; // Documents in chunk or word + search::ComprFileReadContext _readContext; + bool _has_more; + Zc4PostingParams _posting_params; + uint32_t _last_doc_id; // last document in chunk or word + + ZcBuf _zcDocIds; // Document id deltas + ZcBuf _l1Skip; // L1 skip info + ZcBuf _l2Skip; // L2 skip info + ZcBuf _l3Skip; // L3 skip info + ZcBuf _l4Skip; // L4 skip info + + uint64_t _numWords; // Number of words in file + uint32_t _chunkNo; // Chunk number + + // Variables for validating skip information while reading + uint32_t _l1SkipDocId; + uint32_t _l1SkipDocIdPos; + uint64_t _l1SkipFeaturesPos; + uint32_t _l2SkipDocId; + uint32_t _l2SkipDocIdPos; + uint32_t _l2SkipL1SkipPos; + uint64_t _l2SkipFeaturesPos; + uint32_t _l3SkipDocId; + uint32_t _l3SkipDocIdPos; + uint32_t _l3SkipL1SkipPos; + uint32_t _l3SkipL2SkipPos; + uint64_t _l3SkipFeaturesPos; + uint32_t _l4SkipDocId; + uint32_t _l4SkipDocIdPos; + uint32_t _l4SkipL1SkipPos; + uint32_t _l4SkipL2SkipPos; + uint32_t _l4SkipL3SkipPos; + uint64_t _l4SkipFeaturesPos; + + // Variable for validating chunk information while reading + uint64_t _features_size; + index::PostingListCounts _counts; + + uint32_t _residue; // Number of unread documents after word header + void read_common_word_doc_id(bitcompression::DecodeContext64Base &decode_context); + void read_word_start_with_skip(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingHeader &header); + void read_word_start(bitcompression::DecodeContext64Base &decode_context); +public: + Zc4PostingReaderBase(bool dynamic_k); + Zc4PostingReaderBase(const Zc4PostingReaderBase &) = delete; + Zc4PostingReaderBase(Zc4PostingReaderBase &&) = delete; + Zc4PostingReaderBase &operator=(const Zc4PostingReaderBase &) = delete; + Zc4PostingReaderBase &operator=(Zc4PostingReaderBase &&) = delete; + ~Zc4PostingReaderBase(); + void read_doc_id_and_features(index::DocIdAndFeatures &features); + void set_counts(bitcompression::DecodeContext64Base &decode_context, const index::PostingListCounts &counts); + ComprFileReadContext &get_read_context() { return _readContext; } + Zc4PostingParams &get_posting_params() { return _posting_params; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp index 477db7095ed..78d18cb5550 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp @@ -153,11 +153,11 @@ Zc4PostingWriter<bigEndian>::write_docid_and_features(const DocIdAndFeatures &fe uint64_t writeOffset = _encode_features->getWriteOffset(); uint64_t featureSize = writeOffset - _featureOffset; assert(static_cast<uint32_t>(featureSize) == featureSize); - _docIds.push_back(std::make_pair(features._docId, + _docIds.push_back(std::make_pair(features.doc_id(), static_cast<uint32_t>(featureSize))); _featureOffset = writeOffset; } else { - _docIds.push_back(std::make_pair(features._docId, uint32_t(0))); + _docIds.push_back(std::make_pair(features.doc_id(), uint32_t(0))); } } diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp index 513c542637d..07b4da8a85f 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp @@ -7,15 +7,15 @@ LOG_SETUP(".index.docidandfeatures"); namespace search::index { DocIdAndFeatures::DocIdAndFeatures() - : _docId(0), - _wordDocFeatures(), + : _doc_id(0), _elements(), - _wordPositions(), + _word_positions(), _blob(), - _bitOffset(0u), - _bitLength(0u), - _raw(false) -{ } + _bit_offset(0u), + _bit_length(0u), + _has_raw_data(false) +{ +} DocIdAndFeatures::DocIdAndFeatures(const DocIdAndFeatures &) = default; DocIdAndFeatures & DocIdAndFeatures::operator = (const DocIdAndFeatures &) = default; diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h index d1d44f78aa6..a063712a79e 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.h +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h @@ -7,57 +7,25 @@ namespace search::index { -/* +/** * The following feature classes are not self contained. To reduce * memory allocator pressure, the DocIdAndFeatures class contains a * flattened representation of the features at different levels. */ -/* - * (word, doc) features. - * - * Present as member in DocIdAndFeatures. - */ -class WordDocFeatures { -public: - // TODO: add support for user features - - WordDocFeatures() { } - void clear() { } -}; - -/* - * (word, doc, field) features. - * - * Present as vector element in DocIdAndFeatures. - */ -class WordDocFieldFeatures { -public: - uint32_t _numElements; // Number of array indexes - // TODO: add support for user features - - WordDocFieldFeatures() - : _numElements(0u) - {} - - uint32_t getNumElements() const { return _numElements; } - void setNumElements(uint32_t numElements) { _numElements = numElements; } - void incNumElements() { ++_numElements; } -}; - -/* - * (word, doc, field, element) features. +/** + * (word, doc, element) features. * * Present as vector element in DocIdAndFeatures. */ class WordDocElementFeatures { -public: +private: uint32_t _elementId; // Array index uint32_t _numOccs; int32_t _weight; uint32_t _elementLen; - // TODO: add support for user features +public: WordDocElementFeatures() : _elementId(0u), _numOccs(0u), @@ -93,16 +61,16 @@ public: void incNumOccs() { ++_numOccs; } }; -/* - * (word, doc, field, element, wordpos) features. +/** + * (word, doc, element, wordpos) features. * * Present as vector element in DocIdAndFeatures. */ class WordDocElementWordPosFeatures { -public: +private: uint32_t _wordPos; - // TODO: add support for user features +public: WordDocElementWordPosFeatures() : _wordPos(0u) {} @@ -116,30 +84,27 @@ public: }; /** - * Class for minimal common representation of features available for a - * (word, doc) pair, used by index fusion to shuffle information from + * Class for minimal common representation of features available for a (word, doc) pair. + * + * Used in memory index and disk index posting lists and by index fusion to shuffle information from * input files to the output file without having to know all the details. */ class DocIdAndFeatures { public: - uint32_t _docId; // Current Docid - // generic feature data, flattened to avoid excessive allocator usage - WordDocFeatures _wordDocFeatures; + using RawData = std::vector<uint64_t>; + +protected: + uint32_t _doc_id; // Current document id std::vector<WordDocElementFeatures> _elements; - std::vector<WordDocElementWordPosFeatures> _wordPositions; -#ifdef notyet - // user blobs (packed) - UserFeatures _userFeatures; - // TODO: Determine how to handle big endian versus little endian user - // features, and whether set of user features is contiguous in file or - // interleaved with predefined features (word position, word weight) -#endif - // raw data (file format specific, packed) - std::vector<uint64_t> _blob; // Feature data for (word, docid) pair - uint32_t _bitOffset; // Offset of feature start ([0..63]) - uint32_t _bitLength; // Length of features - bool _raw; // + std::vector<WordDocElementWordPosFeatures> _word_positions; + // Raw data (file format specific, packed) + RawData _blob; // Feature data for (word, docid) pair + uint32_t _bit_offset; // Offset of feature start ([0..63]) + uint32_t _bit_length; // Length of features + bool _has_raw_data; + +public: DocIdAndFeatures(); DocIdAndFeatures(const DocIdAndFeatures &); DocIdAndFeatures & operator = (const DocIdAndFeatures &); @@ -147,37 +112,49 @@ public: DocIdAndFeatures & operator = (DocIdAndFeatures &&) = default; ~DocIdAndFeatures(); - void clearFeatures() { - _wordDocFeatures.clear(); + void clear_features() { _elements.clear(); - _wordPositions.clear(); - _bitOffset = 0u; - _bitLength = 0u; + _word_positions.clear(); + _bit_offset = 0u; + _bit_length = 0u; _blob.clear(); } - void clearFeatures(uint32_t bitOffset) { - _wordDocFeatures.clear(); + void clear_features(uint32_t bit_offset) { _elements.clear(); - _wordPositions.clear(); - _bitOffset = bitOffset; - _bitLength = 0u; + _word_positions.clear(); + _bit_offset = bit_offset; + _bit_length = 0u; _blob.clear(); } - void clear(uint32_t docId) { - _docId = docId; - clearFeatures(); + void clear(uint32_t doc_id) { + _doc_id = doc_id; + clear_features(); } - void clear(uint32_t docId, uint32_t bitOffset) { - _docId = docId; - clearFeatures(bitOffset); + void clear(uint32_t doc_id, uint32_t bit_offset) { + _doc_id = doc_id; + clear_features(bit_offset); } - void setRaw(bool raw) { _raw = raw; } - bool getRaw() const { return _raw; } + uint32_t doc_id() const { return _doc_id; } + void set_doc_id(uint32_t val) { _doc_id = val; } + + const std::vector<WordDocElementFeatures>& elements() const { return _elements; } + std::vector<WordDocElementFeatures>& elements() { return _elements; } + + const std::vector<WordDocElementWordPosFeatures>& word_positions() const { return _word_positions; } + std::vector<WordDocElementWordPosFeatures>& word_positions() { return _word_positions; } + + const RawData& blob() const { return _blob; } + RawData& blob() { return _blob; } + uint32_t bit_offset() const { return _bit_offset; } + uint32_t bit_length() const { return _bit_length; } + void set_bit_length(uint32_t val) { _bit_length = val; } + bool has_raw_data() const { return _has_raw_data; } + void set_has_raw_data(bool val) { _has_raw_data = val; } }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp index 974fcc01c36..1d55ed76a09 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp @@ -21,7 +21,7 @@ FeatureStore::writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &featur oldOffset = 0; assert(_f.getWriteOffset() == oldOffset); } - assert(!features.getRaw()); + assert(!features.has_raw_data()); _f.writeFeatures(features); return oldOffset; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 447187e5af7..e79cab28dec 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -171,7 +171,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder) EntryRef featureRef(pitr.getData()); _featureStore.setupForReadFeatures(featureRef, decoder); decoder.readFeatures(features); - features._docId = docId; + features.set_doc_id(docId); indexBuilder.add_document(features); } } else { @@ -183,7 +183,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder) EntryRef featureRef(kd->getData()); _featureStore.setupForReadFeatures(featureRef, decoder); decoder.readFeatures(features); - features._docId = docId; + features.set_doc_id(docId); indexBuilder.add_document(features); } } diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp index 28c5b1fa5df..1e25878a33e 100644 --- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp @@ -43,9 +43,9 @@ struct Builder addDoc(uint32_t docId) { _features.clear(docId); - _features._elements.emplace_back(0, 1, 1); - _features._elements.back().setNumOccs(1); - _features._wordPositions.emplace_back(0); + _features.elements().emplace_back(0, 1, 1); + _features.elements().back().setNumOccs(1); + _features.word_positions().emplace_back(0); _ib.add_document(_features); } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp index 9cbbd136148..d59417a1e78 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp @@ -206,7 +206,7 @@ FakeMemTreeOccMgr::add(uint32_t wordIdx, index::DocIdAndFeatures &features) _featureSizes[wordIdx] += RefType::align((r.second + 7) / 8) * 8; - _unflushed.push_back(PendingOp(wordIdx, features._docId, r.first)); + _unflushed.push_back(PendingOp(wordIdx, features.doc_id(), r.first)); if (_unflushed.size() >= 10000) flush(); diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp index 1fa518af28f..8f6c16658c9 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp @@ -584,7 +584,7 @@ FakeWord::validate(FieldReader &fieldReader, for (residue = numDocs; residue > 0; --residue) { assert(fieldReader._wordNum == wordNum); DocIdAndFeatures &features(fieldReader._docIdAndFeatures); - docId = features._docId; + docId = features.doc_id(); assert(d != de); assert(d->_docId == docId); if (matchData.valid()) { @@ -598,15 +598,15 @@ FakeWord::validate(FieldReader &fieldReader, typedef WordDocElementWordPosFeatures Positions; std::vector<Elements>::const_iterator element = - features._elements.begin(); + features.elements().begin(); std::vector<Positions>::const_iterator position = - features._wordPositions.begin(); + features.word_positions().begin(); TermFieldMatchData *tfmd = matchData[0]; assert(tfmd != 0); - tfmd->reset(features._docId); + tfmd->reset(features.doc_id()); - uint32_t elementResidue = features._elements.size(); + uint32_t elementResidue = features.elements().size(); while (elementResidue != 0) { uint32_t positionResidue = element->getNumOccs(); while (positionResidue != 0) { diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 25997faae24..f6c6e5a64f3 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -211,7 +211,7 @@ FakeZcFilterOcc::read_header(bool doFeatures, bool dynamicK, uint32_t min_skip_d decode_context.setPosition({ _compressed.first, 0 }); Zc4PostingParams params(min_skip_docs, min_chunk_docs, _docIdLimit, dynamicK, doFeatures); Zc4PostingHeader header; - header.read<bigEndian>(decode_context, params); + header.read(decode_context, params); _docIdsSize = header._doc_ids_size; _l1SkipSize = header._l1_skip_size; _l2SkipSize = header._l2_skip_size; @@ -267,16 +267,16 @@ FakeZcFilterOcc::validate_read(const FakeWord &fw, bool encode_features, bool dy check_features.clear(doc._docId); } reader.read_doc_id_and_features(features); - assert(features._docId == doc._docId); - assert(features._elements.size() == check_features._elements.size()); - assert(features._wordPositions.size() == check_features._wordPositions.size()); + assert(features.doc_id() == doc._docId); + assert(features.elements().size() == check_features.elements().size()); + assert(features.word_positions().size() == check_features.word_positions().size()); ++hits; } if (encode_features) { assert(word_pos_iterator == word_pos_iterator_end); } reader.read_doc_id_and_features(features); - assert(static_cast<int32_t>(features._docId) == -1); + assert(static_cast<int32_t>(features.doc_id()) == -1); } FakeZcFilterOcc::~FakeZcFilterOcc() @@ -429,7 +429,7 @@ FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end) DecodeContext &d = _decodeContext; Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false); Zc4PostingHeader header; - header.read<true>(d, params); + header.read(d, params); assert((d.getBitOffset() & 7) == 0); const uint8_t *bcompr = d.getByteCompr(); _valI = bcompr; @@ -650,7 +650,7 @@ initRange(uint32_t begin, uint32_t end) DecodeContext &d = _decodeContext; Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false); Zc4PostingHeader header; - header.read<true>(d, params); + header.read(d, params); _lastDocId = header._last_doc_id; assert((d.getBitOffset() & 7) == 0); const uint8_t *bcompr = d.getByteCompr(); diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index 08473f9fc6c..a341e36045e 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -53,9 +53,9 @@ public: _ss << "a=" << docId; if (_verbose) { _ss << "("; - auto wpi = features._wordPositions.begin(); + auto wpi = features.word_positions().begin(); bool firstElement = true; - for (auto &el : features._elements) { + for (auto &el : features.elements()) { if (!firstElement) { _ss << ","; } diff --git a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java index ca99598c1fb..7d39c0d54e0 100644 --- a/security-utils/src/main/java/com/yahoo/security/KeyUtils.java +++ b/security-utils/src/main/java/com/yahoo/security/KeyUtils.java @@ -3,8 +3,11 @@ package com.yahoo.security; import org.bouncycastle.asn1.ASN1Encodable; import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.eac.ECDSAPublicKey; import org.bouncycastle.asn1.pkcs.PrivateKeyInfo; +import org.bouncycastle.asn1.x509.SubjectPublicKeyInfo; import org.bouncycastle.jcajce.provider.asymmetric.ec.BCECPrivateKey; +import org.bouncycastle.jce.provider.BouncyCastleProvider; import org.bouncycastle.jce.spec.ECParameterSpec; import org.bouncycastle.jce.spec.ECPublicKeySpec; import org.bouncycastle.math.ec.ECPoint; @@ -20,14 +23,18 @@ import java.io.StringReader; import java.io.StringWriter; import java.io.UncheckedIOException; import java.security.GeneralSecurityException; +import java.security.InvalidKeyException; import java.security.KeyFactory; import java.security.KeyPair; import java.security.KeyPairGenerator; +import java.security.NoSuchAlgorithmException; import java.security.PrivateKey; import java.security.PublicKey; +import java.security.Signature; import java.security.interfaces.RSAPrivateCrtKey; import java.security.spec.PKCS8EncodedKeySpec; import java.security.spec.RSAPublicKeySpec; +import java.security.spec.X509EncodedKeySpec; import java.util.ArrayList; import java.util.List; @@ -109,6 +116,30 @@ public class KeyUtils { } } + public static PublicKey fromPemEncodedPublicKey(String pem) { + try (PEMParser parser = new PEMParser(new StringReader(pem))) { + List<Object> unknownObjects = new ArrayList<>(); + Object pemObject; + while ((pemObject = parser.readObject()) != null) { + SubjectPublicKeyInfo keyInfo; + if (pemObject instanceof SubjectPublicKeyInfo) { + keyInfo = (SubjectPublicKeyInfo) pemObject; + } else if (pemObject instanceof PEMKeyPair) { + PEMKeyPair pemKeypair = (PEMKeyPair) pemObject; + keyInfo = pemKeypair.getPublicKeyInfo(); + } else { + unknownObjects.add(pemObject); + continue; + } + JcaPEMKeyConverter pemConverter = new JcaPEMKeyConverter().setProvider(BouncyCastleProviderHolder.getInstance()); + return pemConverter.getPublicKey(keyInfo); + } + throw new IllegalArgumentException("Expected a public key, but found " + unknownObjects.toString()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + public static String toPem(PrivateKey privateKey) { try (StringWriter stringWriter = new StringWriter(); JcaPEMWriter pemWriter = new JcaPEMWriter(stringWriter)) { String algorithm = privateKey.getAlgorithm(); @@ -130,11 +161,42 @@ public class KeyUtils { } private static byte[] getPkcs1Bytes(PrivateKey privateKey) throws IOException{ - byte[] privBytes = privateKey.getEncoded(); PrivateKeyInfo pkInfo = PrivateKeyInfo.getInstance(privBytes); ASN1Encodable encodable = pkInfo.parsePrivateKey(); ASN1Primitive primitive = encodable.toASN1Primitive(); return primitive.getEncoded(); } + + /** Returns a signature instance which computes a SHA-256 hash of its content, before signing with the given private key. */ + public static Signature createSigner(PrivateKey key) { + try { + Signature signer = Signature.getInstance(SignatureAlgorithm.SHA256_WITH_ECDSA.getAlgorithmName(), + BouncyCastleProviderHolder.getInstance()); + signer.initSign(key); + return signer; + } + catch (NoSuchAlgorithmException e) { + throw new IllegalStateException(e); + } + catch (InvalidKeyException e) { + throw new IllegalArgumentException(e); + } + } + + /** Returns a signature instance which computes a SHA-256 hash of its content, before verifying with the given public key. */ + public static Signature createVerifier(PublicKey key) { + try { + Signature signer = Signature.getInstance(SignatureAlgorithm.SHA256_WITH_ECDSA.getAlgorithmName(), + BouncyCastleProviderHolder.getInstance()); + signer.initVerify(key); + return signer; + } + catch (NoSuchAlgorithmException e) { + throw new IllegalStateException(e); + } + catch (InvalidKeyException e) { + throw new IllegalArgumentException(e); + } + } } diff --git a/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java b/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java index 5e786654d7c..3a7480dfc63 100644 --- a/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java +++ b/security-utils/src/test/java/com/yahoo/security/KeyUtilsTest.java @@ -17,6 +17,34 @@ import static org.junit.Assert.assertThat; */ public class KeyUtilsTest { + private static final String rsaPemPublicKey = "-----BEGIN PUBLIC KEY-----\n" + + "MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAsKL8jvIEy2peLtEvyhWW\n" + + "b/O/9RHTfPXjeXahXmVrXE4zY5CJ6Mf1PFkwQ8K8S35YhSbOZM4aYhF9V8F4jwyW\n" + + "nX6qWUMrWVHOuS32fkjdNo0z/KxCbG5nRIWLuv/PkHNuIJqMCbwn6Qud5a+wxeLg\n" + + "LqlroCtUJKAGj4YlZ5i8oMdCqfHKl/DMwcks5XxtIArz6GcM2z8fOB3NRexj32MU\n" + + "LH7ybWhCDx/RSqGQYJ8sWEFIK4HSmYqwqIQpFAm/ixISkeWBL6ikgqchZNMf7xyn\n" + + "yJxjCHgtkxANsQhHj2kgAzLDeBsuM+/WRhBGa+LRvEcuu/zZv9+7eVhpaYJveLVd\n" + + "cwPewW/8liBmKIzj/QPCn7ZlVRk094TZD6TCER4+JFW9mo0vFD8S9o0zhMlckzCF\n" + + "4ZNNgyP9tI8Wecq25A+sUY5/WZNLi+mka/GnfPt97GrhM0YHb1M6t4nh1R437Nwh\n" + + "rUHR/YDazbBvLk5T71GgfQfn44L9SwsqEYaHvdZAfV0IZJBtDo/yCe/yvgtHTymB\n" + + "eBrRMpBU5recPtW8bgEWlHl6Qyduw9EBJjNYxvBpgV/D/tNBcau0aGxmhwpBevet\n" + + "ekV6XA2miC7rWu2Wrq2l5LjXEgZOD5PNN2vQS2Cdet9JHYWbVbK3mBLgoChcC5Xo\n" + + "/QHLU4RydI0i0+Z2/tjGsGsCAwEAAQ==\n" + + "-----END PUBLIC KEY-----\n"; + + /** Generated from the private key above with {@code openssl ec -pubout -in private_key.pem -out public_key.pem} */ + private static final String ecPemPublicKey = "-----BEGIN PUBLIC KEY-----\n" + + "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEuKVFA8dXk43kVfYKzkUqhEY2rDT9\n" + + "z/4jKSTHwbYR8wdsOSrJGVEUPbS2nguIJ64OJH7gFnxM6sxUVj+Nm2HlXw==\n" + + "-----END PUBLIC KEY-----\n"; + + /** Generated with {@code openssl ecparam -name prime256v1 -genkey -noout -out private_key.pem} */ + private static final String ecPemPrivateKey = "-----BEGIN EC PRIVATE KEY-----\n" + + "MHcCAQEEIJUmbIX8YFLHtpRgkwqDDE3igU9RG6JD9cYHWAZii9j7oAoGCCqGSM49\n" + + "AwEHoUQDQgAEuKVFA8dXk43kVfYKzkUqhEY2rDT9z/4jKSTHwbYR8wdsOSrJGVEU\n" + + "PbS2nguIJ64OJH7gFnxM6sxUVj+Nm2HlXw==\n" + + "-----END EC PRIVATE KEY-----\n"; + @Test public void can_extract_public_key_from_rsa_private() { KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); @@ -51,4 +79,15 @@ public class KeyUtilsTest { assertEquals(keyPair.getPrivate(), deserializedKey); } -}
\ No newline at end of file + @Test + public void can_deserialize_rsa_publickey_in_pem_format() { + KeyUtils.fromPemEncodedPublicKey(rsaPemPublicKey); + } + + @Test + public void can_deserialize_ec_keys_in_pem_format() { + KeyUtils.fromPemEncodedPublicKey(ecPemPublicKey); + KeyUtils.fromPemEncodedPrivateKey(ecPemPrivateKey); + } + +} diff --git a/tenant-cd/src/main/java/com/yahoo/vespa/tenant/cd/ProductionTest.java b/tenant-cd/src/main/java/ai/vespa/hosted/cd/ProductionTest.java index beffff029b3..a756b665c1a 100644 --- a/tenant-cd/src/main/java/com/yahoo/vespa/tenant/cd/ProductionTest.java +++ b/tenant-cd/src/main/java/ai/vespa/hosted/cd/ProductionTest.java @@ -1,5 +1,5 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.tenant.cd; +package ai.vespa.hosted.cd; public class ProductionTest { diff --git a/tenant-cd/src/main/java/com/yahoo/vespa/tenant/cd/StagingTest.java b/tenant-cd/src/main/java/ai/vespa/hosted/cd/StagingTest.java index 9a287e7f8ff..789b9deadb0 100644 --- a/tenant-cd/src/main/java/com/yahoo/vespa/tenant/cd/StagingTest.java +++ b/tenant-cd/src/main/java/ai/vespa/hosted/cd/StagingTest.java @@ -1,5 +1,5 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.tenant.cd; +package ai.vespa.hosted.cd; public class StagingTest { diff --git a/tenant-cd/src/main/java/com/yahoo/vespa/tenant/cd/SystemTest.java b/tenant-cd/src/main/java/ai/vespa/hosted/cd/SystemTest.java index 59464f05339..889acb8b9c4 100644 --- a/tenant-cd/src/main/java/com/yahoo/vespa/tenant/cd/SystemTest.java +++ b/tenant-cd/src/main/java/ai/vespa/hosted/cd/SystemTest.java @@ -1,5 +1,5 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.tenant.cd; +package ai.vespa.hosted.cd; public class SystemTest { diff --git a/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/SyncFeedClient.java b/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/SyncFeedClient.java index c1e7f9f04c5..8acdec334d0 100644 --- a/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/SyncFeedClient.java +++ b/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/SyncFeedClient.java @@ -66,10 +66,14 @@ public class SyncFeedClient implements AutoCloseable { } public SyncOperation(String documentId, CharSequence documentData, Object context) { + this(documentId, documentData, new BigInteger(64, ThreadLocalRandom.current()).toString(32), context); + } + + public SyncOperation(String documentId, CharSequence documentData, String operationId, Object context) { this.documentId = Objects.requireNonNull(documentId, "documentId"); this.documentData = Objects.requireNonNull(documentData, "documentData"); this.context = context; - this.operationId = new BigInteger(64, ThreadLocalRandom.current()).toString(32); + this.operationId = Objects.requireNonNull(operationId); } } diff --git a/vespa-http-client/src/test/java/com/yahoo/vespa/http/client/SyncFeedClientTest.java b/vespa-http-client/src/test/java/com/yahoo/vespa/http/client/SyncFeedClientTest.java index fce356e6677..a2d5b18999e 100644 --- a/vespa-http-client/src/test/java/com/yahoo/vespa/http/client/SyncFeedClientTest.java +++ b/vespa-http-client/src/test/java/com/yahoo/vespa/http/client/SyncFeedClientTest.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.http.client.SyncFeedClient.SyncOperation; import com.yahoo.vespa.http.client.SyncFeedClient.SyncResult; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertNull; @@ -72,16 +73,38 @@ public class SyncFeedClientTest { " \"title\": \"Title 4\"" + " }" + "}")); + operations.add(new SyncOperation("id::test::4", + "{" + + " \"put\": \"id::test::4\"," + + " \"fields\": {" + + " \"title\": \"Title 4\"" + + " }" + + "}", "opId_4", null)); + operations.add(new SyncOperation("id::test::4", // Another operation for the same document + "{" + + " \"put\": \"id::test::4\"," + + " \"fields\": {" + + " \"title\": \"Title 44\"" + + " }" + + "}", "opId_44", null)); SyncResult result = feedClient.stream(operations); assertTrue(result.isSuccess()); - assertEquals(4, result.results().size()); + assertEquals(6, result.results().size()); assertNull(result.exception()); assertEquals("id::test::1", result.results().get(0).getDocumentId()); assertEquals("id::test::2", result.results().get(1).getDocumentId()); assertEquals("id::test::3", result.results().get(2).getDocumentId()); assertEquals("id::test::3", result.results().get(3).getDocumentId()); + assertEquals("id::test::4", result.results().get(4).getDocumentId()); + assertEquals("id::test::4", result.results().get(5).getDocumentId()); + assertEquals("opId_4", result.results().get(4).getOperationId()); + assertEquals("opId_44", result.results().get(5).getOperationId()); + assertTrue(result.results().get(4).getDocumentDataAsCharSequence().toString().contains("\"Title 4\"")); + assertTrue(result.results().get(5).getDocumentDataAsCharSequence().toString().contains("\"Title 44\"")); + + result.results().forEach(r -> assertNotNull(r.getOperationId())); } } diff --git a/vespa-maven-plugin/.gitignore b/vespa-maven-plugin/.gitignore new file mode 100644 index 00000000000..12251442258 --- /dev/null +++ b/vespa-maven-plugin/.gitignore @@ -0,0 +1,2 @@ +/target +/pom.xml.build diff --git a/vespa-maven-plugin/OWNERS b/vespa-maven-plugin/OWNERS new file mode 100644 index 00000000000..d0a102ecbf4 --- /dev/null +++ b/vespa-maven-plugin/OWNERS @@ -0,0 +1 @@ +jonmv diff --git a/vespa-maven-plugin/README.md b/vespa-maven-plugin/README.md new file mode 100644 index 00000000000..ee98d721981 --- /dev/null +++ b/vespa-maven-plugin/README.md @@ -0,0 +1,3 @@ +# Vespa application plugin + +Maven Plugin for deploying a Vespa application package. diff --git a/vespa-maven-plugin/pom.xml b/vespa-maven-plugin/pom.xml new file mode 100644 index 00000000000..4516c57f32a --- /dev/null +++ b/vespa-maven-plugin/pom.xml @@ -0,0 +1,49 @@ +<?xml version="1.0"?> +<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>com.yahoo.vespa</groupId> + <artifactId>parent</artifactId> + <version>7-SNAPSHOT</version> + <relativePath>../parent/pom.xml</relativePath> + </parent> + <artifactId>vespa-maven-plugin</artifactId> + <description>Maven Plugin for deploying a Vespa application package</description> + <packaging>maven-plugin</packaging> + + <prerequisites> + <maven>3.5.0</maven> + </prerequisites> + + <dependencies> + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-plugin-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.maven.plugin-tools</groupId> + <artifactId>maven-plugin-annotations</artifactId> + </dependency> + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-model</artifactId> + </dependency> + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-artifact</artifactId> + </dependency> + <dependency> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + </dependencies> +</project> diff --git a/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java index b7d3a06f30d..018acb17387 100644 --- a/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java +++ b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java @@ -6,9 +6,9 @@ package com.yahoo.vespa.hosted.testrunner; */ enum TestProfile { - SYSTEM_TEST("com.yahoo.vespa.tenant.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest", true), - STAGING_TEST("com.yahoo.vespa.tenant.cd.StagingTest, com.yahoo.vespa.tenant.systemtest.base.StagingTest", true), - PRODUCTION_TEST("com.yahoo.vespa.tenant.cd.ProductionTest, com.yahoo.vespa.tenant.systemtest.base.ProductionTest", false); + SYSTEM_TEST("ai.vespa.hosted.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest", true), + STAGING_TEST("ai.vespa.hosted.cd.StagingTest, com.yahoo.vespa.tenant.systemtest.base.StagingTest", true), + PRODUCTION_TEST("ai.vespa.hosted.cd.ProductionTest, com.yahoo.vespa.tenant.systemtest.base.ProductionTest", false); private final String group; private final boolean failIfNoTests; diff --git a/vespa-testrunner-components/src/test/resources/pom.xml_system_tests b/vespa-testrunner-components/src/test/resources/pom.xml_system_tests index 22382b84316..cd6db4dee39 100644 --- a/vespa-testrunner-components/src/test/resources/pom.xml_system_tests +++ b/vespa-testrunner-components/src/test/resources/pom.xml_system_tests @@ -47,10 +47,10 @@ <dependenciesToScan> <dependency>com.yahoo.vespa.testrunner.test:main.jar</dependency> </dependenciesToScan> - <groups>com.yahoo.vespa.tenant.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest</groups> + <groups>ai.vespa.hosted.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest</groups> <excludedGroups>com.yahoo.vespa.tenant.systemtest.base.impl.EmptyExcludeGroup.class</excludedGroups> <excludes> - <exclude>com.yahoo.vespa.tenant.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest</exclude> + <exclude>ai.vespa.hosted.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest</exclude> </excludes> <reportsDirectory>${env.TEST_DIR}</reportsDirectory> <redirectTestOutputToFile>false</redirectTestOutputToFile> |