diff options
Diffstat (limited to 'searchcore/src')
5 files changed, 12 insertions, 320 deletions
diff --git a/searchcore/src/vespa/searchcore/config/CMakeLists.txt b/searchcore/src/vespa/searchcore/config/CMakeLists.txt index 3d62309161c..e31300e51c8 100644 --- a/searchcore/src/vespa/searchcore/config/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/config/CMakeLists.txt @@ -3,10 +3,6 @@ vespa_add_library(searchcore_fconfig STATIC SOURCES DEPENDS ) -vespa_generate_config(searchcore_fconfig partitions.def) -install_config_definition(partitions.def vespa.config.search.core.partitions.def) -vespa_generate_config(searchcore_fconfig fdispatchrc.def) -install_config_definition(fdispatchrc.def vespa.config.search.core.fdispatchrc.def) vespa_generate_config(searchcore_fconfig proton.def) install_config_definition(proton.def vespa.config.search.core.proton.def) vespa_generate_config(searchcore_fconfig ranking-constants.def) diff --git a/searchcore/src/vespa/searchcore/config/fdispatchrc.def b/searchcore/src/vespa/searchcore/config/fdispatchrc.def deleted file mode 100644 index f9464815f6a..00000000000 --- a/searchcore/src/vespa/searchcore/config/fdispatchrc.def +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.search.core - -## Default bias used when calculating docsum slowness limit. The -## formula used is: factor * Average Docsum Time + bias. If a node -## uses more time than this limit, it is considered slow. This config -## value may be overridden per dataset by using the slowdocsumlimitbias -## keyword in the partitions file. -defaultslowdocsumlimitbias double default=1.0 restart - -## Default factor used when calculating docsum slowness limit. The -## formula used is: factor * Average Docsum Time + bias. If a node -## uses more time than this limit, it is considered slow. This config -## value may be overridden per dataset by using the slowdocsumlimitfactor -## keyword in the partitions file. -defaultslowdocsumlimitfactor double default=2.0 restart - -## Default bias used when calculating query slowness limit. The -## formula used is: factor * Average Search Time + bias. If a node -## uses more time than this limit, it is considered slow. This config -## value may be overridden per dataset by using the slowquerylimitbias -## keyword in the partitions file. -defaultslowquerylimitbias double default=1.0 restart - -## Default factor used when calculating query slowness limit. The -## formula used is: factor * Average Search Time + bias. If a node -## uses more time than this limit, it is considered slow. This config -## value may be overridden per dataset by using the slowquerylimitfactor -## keyword in the partitions file. -defaultslowquerylimitfactor double default=2.0 restart - -## The port where FNET Remote Tools RPC service should be made available. -## If 0, fdispatch will not offer RPC service. -frtport int default=0 restart - -## Port for health reporting -healthport int default=0 restart - -## The maximum time between successful reads on a socket before timeout. -maxsocksilent double default=5.0 restart - -## The maximum number of threads used. 0 means no limit. -maxthreads int default=150 restart - -## The number of transport threads used when talking to search nodes. -transportthreads int default=1 restart - -## specifies the partition we provide upwards in a multi-level dispatch system. -partition int default=0 restart - -## specifies the port number for the persistent internal transport -## protocol provided for a multi-level dispatch system. If this value -## is 0, MLD service is not provided, and this process cannot be used -## by other fdispatch processes. -ptport int default=0 restart - -## The name of the upwards transport to be used. If empty, use -## the default transport. -## This config setting is unused and should be removed. -transport string default="" - -## If present, the TCP_NODELAY option is set on the persistent -## transport connections. This causes non-full packets to be sent even -## though previously sent data has not yet been acknowledged (e.g. due -## to the delayed ack feature present on various tcp stacks). -transportnodelay bool default=true restart - -## Minimum size of packets to compress (0 means no compression) -## -packetcompresslimit int default = 1024 restart - -## Compression level for packets -## -## Default value is 3 -packetcompresslevel int default = 3 restart - -## Compression type for packets -## -## Default is LZ4 -packetcompresstype enum {NONE, LZ4} default=LZ4 restart - -## Specifies at which level this dispatcher is in a multi-level dispatch system. -## The top-level dispatcher is at level 0. -dispatchlevel int default=0 restart diff --git a/searchcore/src/vespa/searchcore/config/partitions.def b/searchcore/src/vespa/searchcore/config/partitions.def deleted file mode 100644 index b9c3aabb37e..00000000000 --- a/searchcore/src/vespa/searchcore/config/partitions.def +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.search.core - -## each dataset must be identified by a unique id, preferably a small integer. -## note that the array index in the dataset[] array is never significant. -dataset[].id int - -## Define the cost of using a dataset. fdispatch will try to -## spread queries not specifying dataset over all datasets having a -## nonzero refcost to minimize the maximum active refcost. -dataset[].refcost int default=0 - -## Number of searchable copies on each node. -## Searchable copies - nodes down < 1 will trigger an estimate of coverage. -dataset[].searchablecopies long default=1 - -## Defines the number of bits used to encode the partition number -## internally on a dispatch node. The value must be in the range [1,8]. -## The default value is 6, allowing the numparts parameter to be up to 63. -dataset[].partbits int default=8 - -## Defines the number of bits used to encode the row number internally -## on a dispatch node. A nonzero value is needed to allow slightly -## different engines to be on the same partition. -dataset[].rowbits int default=0 - -## Defines the number of partitions below this fdispatch process. -## The fsearch and fdispatch processes contacted must provide a partition -## number in the range [ firstpart, firstpart + partitions - 1 ] -## The legal range for numparts is limited by the partbits parameter. -## For PLAIN datasets numparts must be positive. -dataset[].numparts int default=0 - -## Defines the lowest partition number accepted by this fdispatch -## process. -dataset[].firstpart int default=0 - -## Minimum number of partitions available for the dataset to be -## considered up. If any dataset is considered down then the HTTP -## interface on fdispatch is temporarily closed. A dispatch node below -## can represent multiple partitions, e.g. a dispatch node having 5 -## dispatch nodes below that each has 20 search nodes below can have -## 100 partitions. Setting minpartitions to 95 will then mean that at -## most 5 search nodes can be down before the top level dispatch node -## takes down it's HTTP interface. -dataset[].minpartitions int default=0 - -## Minimum number of good engines in a partition before the partition is -## used for queries. -dataset[].mpp int default=1 - -## Maximum number of nodes that can be down in a row while still using this row -## for queries when using the FIXEDROW query distribution. -dataset[].maxnodesdownperfixedrow int default=0 - -## Use simple roundrobin or adaptive based on latency. -dataset[].useroundrobinforfixedrow bool default=true - -## specifies where a fdispatch or fsearch process can be contacted. -## must be in the format hostname:port/id where /id is optional. -## Normally you have at least as many engine array members as -## the number of partitions specified. To reduce the impact of -## packet loss, multiple instances of the same host and port but -## with different ids can be used. -dataset[].engine[].name_and_port string - -## If a non-negative partition number is specified then the engine -## is hardwired to that partition number and considered to be down if -## the monitoring partition ID doesn't match (but see overridepartids). -dataset[].engine[].partid int default=0 - -## If rowbits is nonzero you can specify a rowid for each engine. -## Then engines with different rowid and the same partition number -## don't need to be completely identical. If the rowid can't be -## encoded within the number of rowbits then the behaviour is undefined. -dataset[].engine[].rowid int default=0 - -## If the engine is a dispatch node, the subdatasetid specifies -## which of the engine's datasets should be used. -dataset[].engine[].subdatasetid int default=0 - -## The refcost is used for load balancing between different engines -## in the same partition (column) in the same manner as the refcost -## parameter for datasets. A refcost of 0 means that the engine won't -## be selected as part of load balancing. -dataset[].engine[].refcost int default=1 - -## When set to true, overrides of the partition id reported by the node. -## This only affects the engine if the partid is specified. -dataset[].engine[].overridepartids bool default=true - -## Maximum number of hits that will be requested from a single node -## in this dataset. If not set, there is no limit. Using this option -## may help reduce network traffic when searching in datasets with big -## fan-out, but it will also result in incorrect and incomplete results; -## don't use it if you don't (really) mean it. -dataset[].maxhitspernode int default=2147483647 - -## When using this dataset as an estimation dataset -## only use estparts number of partitions instead of the full numparts. -## The partitions are randomly chosen and the estimate multiplied with -## numparts/estparts. If estparts=0 the entire dataset is used. -dataset[].estparts int default=0 - -## When using this dataset as an estimation dataset only consider -## the partitions with partition number below estpartcutoff. -## estpartcutoff=0 means no cutoff. Avoid using this parameter. -dataset[].estpartcutoff int default=0 - -## Minimum active requests before considering estimates. -dataset[].minactive int default=500 - -## Maximum active requests before queueing. -dataset[].maxactive int default=500 - -## Maximum active requests before cutoff. -dataset[].cutoffactive int default=1000 - -## Minimum estimated active requests before queueing. -dataset[].minestactive int default=500 - -## Maximum estimated active requests before early drop. -dataset[].maxestactive int default=1000 - -## Maximum estimated active requests (100% early drop). -dataset[].cutoffestactive int default=1000 - -## Maximum query queue drain rate. -dataset[].queuedrainrate double default=400 - -## Maximum queued queries drained at once. -dataset[].queuedrainmax double default=40 - -## Factor used when calculating query slowness limit. The formula -## used is: factor * Average Search Time + bias. If a node uses more -## time than this limit, it is considered slow. The default value is -## defined by the defaultslowquerylimitfactor config value in the -## fdispatchrc config. -dataset[].slowquerylimitfactor double default=0.0 - -## Bias used when calculating query slowness limit. -dataset[].slowquerylimitbias double default=100.0 - -## Factor used when calculating docsum slowness limit. The formula -## used is: factor * Average Docsum Time + bias. If a node uses more -## time than this limit, it is considered slow. The default value is -## defined by the defaultslowdocsumlimitfactor config value in the -## fdispatchrc config. -dataset[].slowdocsumlimitfactor double default=0 - -## Bias used when calculating docsum slowness limit. -dataset[].slowdocsumlimitbias double default=100.0 - -## The number of seconds between sending monitor requests to the -## attached search nodes. -dataset[].monitorinterval double default=1.0 - -## The maximum number of seconds to wait for the resultset after -## minimal search coverage is reached. -dataset[].higher_coverage_maxsearchwait double default=1.0 - -## The minimum number of seconds to wait for the resultset while -## full search coverage is still not reached. -dataset[].higher_coverage_minsearchwait double default=0.0 - -## If less time than the base wait has elapsed then boost the timeouts -## above with the difference between the base wait and the elapsed -## time. This compensates somewhat for quick responses (e.g. cached -## response on most nodes) without impacting the timeouts for -## responses that are not so quick. -dataset[].higher_coverage_basesearchwait double default=0.1 - -## The minimum search coverage, as a percentage. -dataset[].minimal_searchcoverage double default=100.0 - -## The maximum number of seconds to wait for document summaries -## after minimum docsum coverage is reached. -dataset[].higher_coverage_maxdocsumwait double default=0.3 - -## The minimum number of seconds to wait for document summaries -## while full docsum coverage is still not reached. -dataset[].higher_coverage_mindocsumwait double default=0.1 - -## If less time than the base wait has elapsed then boost the timeouts -## above with the difference between the base wait and the elapsed -## time. This compensates somewhat for quick responses (e.g. cached -## response on most nodes) without impacting the timeouts for -## responses that are not so quick. -dataset[].higher_coverage_basedocsumwait double default=0.1 - -## The minimum docsum coverage, as a percentage. -dataset[].minimal_docsumcoverage double default=100.0 - -## If random, use standard load balancing. -## if deterministic, use deterministic query forwarding -## If auto, use deterministic when the frequence distribution of -## the queries are relatively well balanced, and fall back to -## standard load balancing when not. -dataset[].querydistribution enum { RANDOM, AUTOMATIC, FIXEDROW } default=AUTOMATIC - -## Minimum coverage for allowing a group to be considered for serving. -dataset[].min_group_coverage double default=100 - -## Required activedocs coverage for a group, as a percentage -dataset[].min_activedocs_coverage double default=97.0 - -## Decay rate used when loadbalancing between groups. -## Lower number will react faster to changes in cluster. -dataset[].latency_decay_rate double default=1000 diff --git a/searchcore/src/vespa/searchcore/grouping/mergingmanager.cpp b/searchcore/src/vespa/searchcore/grouping/mergingmanager.cpp index 6c0d201c28c..1fcb8a56acb 100644 --- a/searchcore/src/vespa/searchcore/grouping/mergingmanager.cpp +++ b/searchcore/src/vespa/searchcore/grouping/mergingmanager.cpp @@ -20,18 +20,14 @@ private: uint32_t _rowBits; uint32_t _partId; uint32_t _rowId; - bool _mld; public: typedef search::aggregation::FS4Hit FS4Hit; - PathMangler(uint32_t partBits, uint32_t rowBits, uint32_t partId, uint32_t rowId, bool mld) - : _partBits(partBits), _rowBits(rowBits), _partId(partId), _rowId(rowId), _mld(mld) {} + PathMangler(uint32_t partBits, uint32_t rowBits, uint32_t partId, uint32_t rowId) + : _partBits(partBits), _rowBits(rowBits), _partId(partId), _rowId(rowId) {} bool check(const vespalib::Identifiable &obj) const override; void execute(vespalib::Identifiable &obj) override __attribute__((noinline)); uint32_t computeNewPath(uint32_t path) const { - if (_mld) { - path = (path + 1) << _partBits; - } path += _partId; if (_rowBits > 0) { path = (path << _rowBits) + _rowId; @@ -70,18 +66,16 @@ MergingManager::~MergingManager() } void -MergingManager::addResult(uint32_t partId, uint32_t rowId, bool mld, +MergingManager::addResult(uint32_t partId, uint32_t rowId, const char *groupResult, size_t groupResultLen) { - _input.push_back(Entry(partId, rowId, mld, groupResult, groupResultLen)); + _input.push_back(Entry(partId, rowId, groupResult, groupResultLen)); } bool MergingManager::needMerge() const { if (_input.size() == 1) { - PathMangler pathMangler(_partBits, _rowBits, - _input[0].partId, _input[0].rowId, - _input[0].mld); + PathMangler pathMangler(_partBits, _rowBits, _input[0].partId, _input[0].rowId); if (pathMangler.computeNewPath(0) == 0) { return false; } @@ -110,7 +104,7 @@ namespace { void mergeOne(MAP & map, const MergingManager::Entry & input, uint32_t partBits, uint32_t rowBits) __attribute__((noinline)); void mergeOne(MAP & map, const MergingManager::Entry & input, uint32_t partBits, uint32_t rowBits) { - PathMangler pathMangler(partBits, rowBits, input.partId, input.rowId, input.mld); + PathMangler pathMangler(partBits, rowBits, input.partId, input.rowId); vespalib::nbostream is(input.data, input.length); vespalib::NBOSerializer nis(is); uint32_t cnt = 0; diff --git a/searchcore/src/vespa/searchcore/grouping/mergingmanager.h b/searchcore/src/vespa/searchcore/grouping/mergingmanager.h index ea2c62909b9..08349e8629a 100644 --- a/searchcore/src/vespa/searchcore/grouping/mergingmanager.h +++ b/searchcore/src/vespa/searchcore/grouping/mergingmanager.h @@ -4,8 +4,7 @@ #include <memory> #include <vector> -namespace search { -namespace grouping { +namespace search::grouping { /** * Wrapper class used to handle merging of grouping results. All input @@ -16,17 +15,16 @@ class MergingManager public: /** * Simple wrapper for all the grouping results from a single - * search/fdispatch node. + * search node. **/ struct Entry { uint32_t partId; uint32_t rowId; - bool mld; const char *data; size_t length; - Entry(uint32_t part, uint32_t row, bool m, const char *pt, size_t len) - : partId(part), rowId(row), mld(m), data(pt), length(len) {} + Entry(uint32_t part, uint32_t row, const char *pt, size_t len) + : partId(part), rowId(row), data(pt), length(len) {} }; private: @@ -61,11 +59,10 @@ public: * * @param partId which partition these results came from * @param rowId which row these results came from - * @param mld true if the node below is a dispatch node * @param groupSpec group spec * @param groupSpecLen length of the group spec **/ - void addResult(uint32_t partId, uint32_t rowId, bool mld, + void addResult(uint32_t partId, uint32_t rowId, const char *groupResult, size_t groupResultLen); /** @@ -98,6 +95,4 @@ public: char *stealGroupResult(); }; -} // namespace search::grouping -} // namespace search - +} |